blob: fa7e910e7df1c7189039cf12f288c3e4eced871f [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
3"""
Martin v. Löwis00756902006-02-05 17:09:41 +00004import struct, os, time, sys
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00009except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010 zlib = None
11
Skip Montanaro40fc1602001-03-01 04:27:19 +000012__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000013 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000014
Fred Drake5db246d2000-09-29 20:44:48 +000015class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017
18
19class LargeZipFile(Exception):
20 """
21 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
24
Tim Peterse1190062001-01-15 03:34:38 +000025error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Thomas Wouters0e3f5912006-08-11 14:57:12 +000027ZIP64_LIMIT= (1 << 31) - 1
28
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029# constants for Zip file compression methods
30ZIP_STORED = 0
31ZIP_DEFLATED = 8
32# Other ZIP compression methods not supported
33
34# Here are some struct module formats for reading headers
35structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
36stringEndArchive = "PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000037structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000039structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringFileHeader = "PK\003\004" # magic number for file header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Fred Drake3e038e52001-02-28 17:56:26 +000047# indexes of entries in the central directory structure
48_CD_SIGNATURE = 0
49_CD_CREATE_VERSION = 1
50_CD_CREATE_SYSTEM = 2
51_CD_EXTRACT_VERSION = 3
52_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53_CD_FLAG_BITS = 5
54_CD_COMPRESS_TYPE = 6
55_CD_TIME = 7
56_CD_DATE = 8
57_CD_CRC = 9
58_CD_COMPRESSED_SIZE = 10
59_CD_UNCOMPRESSED_SIZE = 11
60_CD_FILENAME_LENGTH = 12
61_CD_EXTRA_FIELD_LENGTH = 13
62_CD_COMMENT_LENGTH = 14
63_CD_DISK_NUMBER_START = 15
64_CD_INTERNAL_FILE_ATTRIBUTES = 16
65_CD_EXTERNAL_FILE_ATTRIBUTES = 17
66_CD_LOCAL_HEADER_OFFSET = 18
67
68# indexes of entries in the local file header structure
69_FH_SIGNATURE = 0
70_FH_EXTRACT_VERSION = 1
71_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72_FH_GENERAL_PURPOSE_FLAG_BITS = 3
73_FH_COMPRESSION_METHOD = 4
74_FH_LAST_MOD_TIME = 5
75_FH_LAST_MOD_DATE = 6
76_FH_CRC = 7
77_FH_COMPRESSED_SIZE = 8
78_FH_UNCOMPRESSED_SIZE = 9
79_FH_FILENAME_LENGTH = 10
80_FH_EXTRA_FIELD_LENGTH = 11
81
Guido van Rossum32abe6f2000-03-31 17:30:02 +000082def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000083 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084 try:
85 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000086 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000087 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000089 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000090 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000092 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093
Thomas Wouters0e3f5912006-08-11 14:57:12 +000094def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
102 if sig != stringEndArchive64Locator:
103 return endrec
104
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
107
108 # Assume no 'zip64 extensible data'
109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
115 if sig != stringEndArchive64:
116 return endrec
117
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
126
127
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000128def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
130
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000163 return endrec
164 return # Error, return None
165
Fred Drake484d7352000-10-02 21:14:52 +0000166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000168 """Class with attributes describing each file in the ZIP archive."""
169
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
189 )
190
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000192 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000196 null_byte = filename.find(chr(0))
197 if null_byte >= 0:
198 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000203 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204
Greg Ward8e36d282003-06-18 00:53:06 +0000205 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000206 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000207 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.compress_type = ZIP_STORED # Type of compression for the file
209 self.comment = "" # Comment for each file
210 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000211 if sys.platform == 'win32':
212 self.create_system = 0 # System which created ZIP archive
213 else:
214 # Assume everything else is unix-y
215 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000216 self.create_version = 20 # Version which created ZIP archive
217 self.extract_version = 20 # Version needed to extract archive
218 self.reserved = 0 # Must be zero
219 self.flag_bits = 0 # ZIP flag bits
220 self.volume = 0 # Volume number of file header
221 self.internal_attr = 0 # Internal attributes
222 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000223 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000224 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228
229 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000230 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231 dt = self.date_time
232 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000233 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000235 # Set these to zero because we write them after the file data
236 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 else:
Tim Peterse1190062001-01-15 03:34:38 +0000238 CRC = self.CRC
239 compress_size = self.compress_size
240 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000241
242 extra = self.extra
243
244 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
247 fmt = '<hhqq'
248 extra = extra + struct.pack(fmt,
249 1, struct.calcsize(fmt)-4, file_size, compress_size)
250 file_size = 0xffffffff # -1
251 compress_size = 0xffffffff # -1
252 self.extract_version = max(45, self.extract_version)
253 self.create_version = max(45, self.extract_version)
254
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000255 header = struct.pack(structFileHeader, stringFileHeader,
256 self.extract_version, self.reserved, self.flag_bits,
257 self.compress_type, dostime, dosdate, CRC,
258 compress_size, file_size,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000259 len(self.filename), len(extra))
260 return header + self.filename + extra
261
262 def _decodeExtra(self):
263 # Try to decode the extra field.
264 extra = self.extra
265 unpack = struct.unpack
266 while extra:
267 tp, ln = unpack('<hh', extra[:4])
268 if tp == 1:
269 if ln >= 24:
270 counts = unpack('<qqq', extra[4:28])
271 elif ln == 16:
272 counts = unpack('<qq', extra[4:20])
273 elif ln == 8:
274 counts = unpack('<q', extra[4:12])
275 elif ln == 0:
276 counts = ()
277 else:
278 raise RuntimeError, "Corrupt extra field %s"%(ln,)
279
280 idx = 0
281
282 # ZIP64 extension (large files and/or large archives)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000283 if self.file_size == -1 or self.file_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000284 self.file_size = counts[idx]
285 idx += 1
286
Guido van Rossume2a383d2007-01-15 16:59:06 +0000287 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288 self.compress_size = counts[idx]
289 idx += 1
290
Guido van Rossume2a383d2007-01-15 16:59:06 +0000291 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292 old = self.header_offset
293 self.header_offset = counts[idx]
294 idx+=1
295
296 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000297
298
Thomas Wouterscf297e42007-02-23 15:07:44 +0000299class _ZipDecrypter:
300 """Class to handle decryption of files stored within a ZIP archive.
301
302 ZIP supports a password-based form of encryption. Even though known
303 plaintext attacks have been found against it, it is still useful
304 for low-level securicy.
305
306 Usage:
307 zd = _ZipDecrypter(mypwd)
308 plain_char = zd(cypher_char)
309 plain_text = map(zd, cypher_text)
310 """
311
312 def _GenerateCRCTable():
313 """Generate a CRC-32 table.
314
315 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
316 internal keys. We noticed that a direct implementation is faster than
317 relying on binascii.crc32().
318 """
319 poly = 0xedb88320
320 table = [0] * 256
321 for i in range(256):
322 crc = i
323 for j in range(8):
324 if crc & 1:
325 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
326 else:
327 crc = ((crc >> 1) & 0x7FFFFFFF)
328 table[i] = crc
329 return table
330 crctable = _GenerateCRCTable()
331
332 def _crc32(self, ch, crc):
333 """Compute the CRC32 primitive on one byte."""
334 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
335
336 def __init__(self, pwd):
337 self.key0 = 305419896
338 self.key1 = 591751049
339 self.key2 = 878082192
340 for p in pwd:
341 self._UpdateKeys(p)
342
343 def _UpdateKeys(self, c):
344 self.key0 = self._crc32(c, self.key0)
345 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
346 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
347 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
348
349 def __call__(self, c):
350 """Decrypt a single character."""
Guido van Rossum98f97462007-04-13 03:31:13 +0000351 # XXX When this is called with a byte instead of a char, ord()
352 # isn't needed. Don't die in that case. In the future we should
353 # just leave this out, once we're always using bytes.
354 try:
355 c = ord(c)
356 except TypeError:
357 pass
Thomas Wouterscf297e42007-02-23 15:07:44 +0000358 k = self.key2 | 2
359 c = c ^ (((k * (k^1)) >> 8) & 255)
360 c = chr(c)
361 self._UpdateKeys(c)
362 return c
363
Guido van Rossumd8faa362007-04-27 19:54:29 +0000364class ZipExtFile:
365 """File-like object for reading an archive member.
366 Is returned by ZipFile.open().
367 """
368
369 def __init__(self, fileobj, zipinfo, decrypt=None):
370 self.fileobj = fileobj
371 self.decrypter = decrypt
372 self.bytes_read = 0
373 self.rawbuffer = ''
374 self.readbuffer = ''
375 self.linebuffer = ''
376 self.eof = False
377 self.univ_newlines = False
378 self.nlSeps = ("\n", )
379 self.lastdiscard = ''
380
381 self.compress_type = zipinfo.compress_type
382 self.compress_size = zipinfo.compress_size
383
384 self.closed = False
385 self.mode = "r"
386 self.name = zipinfo.filename
387
388 # read from compressed files in 64k blocks
389 self.compreadsize = 64*1024
390 if self.compress_type == ZIP_DEFLATED:
391 self.dc = zlib.decompressobj(-15)
392
393 def set_univ_newlines(self, univ_newlines):
394 self.univ_newlines = univ_newlines
395
396 # pick line separator char(s) based on universal newlines flag
397 self.nlSeps = ("\n", )
398 if self.univ_newlines:
399 self.nlSeps = ("\r\n", "\r", "\n")
400
401 def __iter__(self):
402 return self
403
404 def __next__(self):
405 nextline = self.readline()
406 if not nextline:
407 raise StopIteration()
408
409 return nextline
410
411 def close(self):
412 self.closed = True
413
414 def _checkfornewline(self):
415 nl, nllen = -1, -1
416 if self.linebuffer:
417 # ugly check for cases where half of an \r\n pair was
418 # read on the last pass, and the \r was discarded. In this
419 # case we just throw away the \n at the start of the buffer.
420 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
421 self.linebuffer = self.linebuffer[1:]
422
423 for sep in self.nlSeps:
424 nl = self.linebuffer.find(sep)
425 if nl >= 0:
426 nllen = len(sep)
427 return nl, nllen
428
429 return nl, nllen
430
431 def readline(self, size = -1):
432 """Read a line with approx. size. If size is negative,
433 read a whole line.
434 """
435 if size < 0:
436 size = sys.maxint
437 elif size == 0:
438 return ''
439
440 # check for a newline already in buffer
441 nl, nllen = self._checkfornewline()
442
443 if nl >= 0:
444 # the next line was already in the buffer
445 nl = min(nl, size)
446 else:
447 # no line break in buffer - try to read more
448 size -= len(self.linebuffer)
449 while nl < 0 and size > 0:
450 buf = self.read(min(size, 100))
451 if not buf:
452 break
453 self.linebuffer += buf
454 size -= len(buf)
455
456 # check for a newline in buffer
457 nl, nllen = self._checkfornewline()
458
459 # we either ran out of bytes in the file, or
460 # met the specified size limit without finding a newline,
461 # so return current buffer
462 if nl < 0:
463 s = self.linebuffer
464 self.linebuffer = ''
465 return s
466
467 buf = self.linebuffer[:nl]
468 self.lastdiscard = self.linebuffer[nl:nl + nllen]
469 self.linebuffer = self.linebuffer[nl + nllen:]
470
471 # line is always returned with \n as newline char (except possibly
472 # for a final incomplete line in the file, which is handled above).
473 return buf + "\n"
474
475 def readlines(self, sizehint = -1):
476 """Return a list with all (following) lines. The sizehint parameter
477 is ignored in this implementation.
478 """
479 result = []
480 while True:
481 line = self.readline()
482 if not line: break
483 result.append(line)
484 return result
485
486 def read(self, size = None):
487 # act like file() obj and return empty string if size is 0
488 if size == 0:
489 return ''
490
491 # determine read size
492 bytesToRead = self.compress_size - self.bytes_read
493
494 # adjust read size for encrypted files since the first 12 bytes
495 # are for the encryption/password information
496 if self.decrypter is not None:
497 bytesToRead -= 12
498
499 if size is not None and size >= 0:
500 if self.compress_type == ZIP_STORED:
501 lr = len(self.readbuffer)
502 bytesToRead = min(bytesToRead, size - lr)
503 elif self.compress_type == ZIP_DEFLATED:
504 if len(self.readbuffer) > size:
505 # the user has requested fewer bytes than we've already
506 # pulled through the decompressor; don't read any more
507 bytesToRead = 0
508 else:
509 # user will use up the buffer, so read some more
510 lr = len(self.rawbuffer)
511 bytesToRead = min(bytesToRead, self.compreadsize - lr)
512
513 # avoid reading past end of file contents
514 if bytesToRead + self.bytes_read > self.compress_size:
515 bytesToRead = self.compress_size - self.bytes_read
516
517 # try to read from file (if necessary)
518 if bytesToRead > 0:
519 bytes = self.fileobj.read(bytesToRead)
520 self.bytes_read += len(bytes)
521 self.rawbuffer += bytes
522
523 # handle contents of raw buffer
524 if self.rawbuffer:
525 newdata = self.rawbuffer
526 self.rawbuffer = ''
527
528 # decrypt new data if we were given an object to handle that
529 if newdata and self.decrypter is not None:
530 newdata = ''.join(map(self.decrypter, newdata))
531
532 # decompress newly read data if necessary
533 if newdata and self.compress_type == ZIP_DEFLATED:
534 newdata = self.dc.decompress(newdata)
535 self.rawbuffer = self.dc.unconsumed_tail
536 if self.eof and len(self.rawbuffer) == 0:
537 # we're out of raw bytes (both from the file and
538 # the local buffer); flush just to make sure the
539 # decompressor is done
540 newdata += self.dc.flush()
541 # prevent decompressor from being used again
542 self.dc = None
543
544 self.readbuffer += newdata
545
546
547 # return what the user asked for
548 if size is None or len(self.readbuffer) <= size:
549 bytes = self.readbuffer
550 self.readbuffer = ''
551 else:
552 bytes = self.readbuffer[:size]
553 self.readbuffer = self.readbuffer[size:]
554
555 return bytes
556
557
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000559 """ Class with methods to open, read, write, close, list zip files.
560
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000561 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000562
Fred Drake3d9091e2001-03-26 15:49:24 +0000563 file: Either the path to the file, or a file-like object.
564 If it is a path, the file will be opened and closed by ZipFile.
565 mode: The mode can be either read "r", write "w" or append "a".
566 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000567 allowZip64: if True ZipFile will create files with ZIP64 extensions when
568 needed, otherwise it will raise an exception when this would
569 be necessary.
570
Fred Drake3d9091e2001-03-26 15:49:24 +0000571 """
Fred Drake484d7352000-10-02 21:14:52 +0000572
Fred Drake90eac282001-02-28 05:29:34 +0000573 fp = None # Set here since __del__ checks it
574
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000575 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000576 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000577 self._allowZip64 = allowZip64
578 self._didModify = False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000579 if compression == ZIP_STORED:
580 pass
581 elif compression == ZIP_DEFLATED:
582 if not zlib:
583 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000584 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000585 else:
586 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000587 self.debug = 0 # Level of printing: 0 through 3
588 self.NameToInfo = {} # Find file info given name
589 self.filelist = [] # List of ZipInfo instances for archive
590 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000591 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000592 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000593
Fred Drake3d9091e2001-03-26 15:49:24 +0000594 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000595 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000596 self._filePassed = 0
597 self.filename = file
598 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000599 try:
600 self.fp = open(file, modeDict[mode])
601 except IOError:
602 if mode == 'a':
603 mode = key = 'w'
604 self.fp = open(file, modeDict[mode])
605 else:
606 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000607 else:
608 self._filePassed = 1
609 self.fp = file
610 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000611
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000612 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000613 self._GetContents()
614 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000615 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000616 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000617 try: # See if file is a zip file
618 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000619 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000620 self.fp.seek(self.start_dir, 0)
621 except BadZipfile: # file is not a zip file, just append
622 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000623 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000624 if not self._filePassed:
625 self.fp.close()
626 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000627 raise RuntimeError, 'Mode must be "r", "w" or "a"'
628
629 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000630 """Read the directory, making sure we close the file if the format
631 is bad."""
632 try:
633 self._RealGetContents()
634 except BadZipfile:
635 if not self._filePassed:
636 self.fp.close()
637 self.fp = None
638 raise
639
640 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000641 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000642 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000643 endrec = _EndRecData(fp)
644 if not endrec:
645 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000646 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000647 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000648 size_cd = endrec[5] # bytes in central directory
649 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000650 self.comment = endrec[8] # archive comment
651 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000652 if endrec[9] > ZIP64_LIMIT:
653 x = endrec[9] - size_cd - 56 - 20
654 else:
655 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000656 # "concat" is zero, unless zip was concatenated to another file
657 concat = x - offset_cd
658 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000659 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000660 # self.start_dir: Position of start of central directory
661 self.start_dir = offset_cd + concat
662 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000663 data = fp.read(size_cd)
664 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665 total = 0
666 while total < size_cd:
667 centdir = fp.read(46)
668 total = total + 46
669 if centdir[0:4] != stringCentralDir:
670 raise BadZipfile, "Bad magic number for central directory"
671 centdir = struct.unpack(structCentralDir, centdir)
672 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000673 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000674 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000675 # Create ZipInfo instance to store file information
676 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000677 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
678 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
679 total = (total + centdir[_CD_FILENAME_LENGTH]
680 + centdir[_CD_EXTRA_FIELD_LENGTH]
681 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000682 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000683 (x.create_version, x.create_system, x.extract_version, x.reserved,
684 x.flag_bits, x.compress_type, t, d,
685 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
686 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
687 # Convert date/time code to (year, month, day, hour, min, sec)
688 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000689 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000690
691 x._decodeExtra()
692 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000693 self.filelist.append(x)
694 self.NameToInfo[x.filename] = x
695 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000696 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000697
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698
699 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000700 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000701 l = []
702 for data in self.filelist:
703 l.append(data.filename)
704 return l
705
706 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000707 """Return a list of class ZipInfo instances for files in the
708 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 return self.filelist
710
711 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000712 """Print a table of contents for the zip file."""
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000713 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 for zinfo in self.filelist:
715 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000716 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717
718 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000719 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 for zinfo in self.filelist:
721 try:
Tim Peterse1190062001-01-15 03:34:38 +0000722 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000723 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724 return zinfo.filename
725
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000726
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000728 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 return self.NameToInfo[name]
730
Thomas Wouterscf297e42007-02-23 15:07:44 +0000731 def setpassword(self, pwd):
732 """Set default password for encrypted files."""
733 self.pwd = pwd
734
735 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000736 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000737 return self.open(name, "r", pwd).read()
738
739 def open(self, name, mode="r", pwd=None):
740 """Return file-like object for 'name'."""
741 if mode not in ("r", "U", "rU"):
742 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 if not self.fp:
744 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000745 "Attempt to read ZIP archive that was already closed"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000746
Guido van Rossumd8faa362007-04-27 19:54:29 +0000747 # Only open a new file for instances where we were not
748 # given a file object in the constructor
749 if self._filePassed:
750 zef_file = self.fp
751 else:
752 zef_file = open(self.filename, 'rb')
753
754 # Get info object for name
755 zinfo = self.getinfo(name)
756
757 filepos = zef_file.tell()
758
759 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000760
761 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000762 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000763 if fheader[0:4] != stringFileHeader:
764 raise BadZipfile, "Bad magic number for file header"
765
766 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000767 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000768 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000770
771 if fname != zinfo.orig_filename:
772 raise BadZipfile, \
773 'File name in directory "%s" and header "%s" differ.' % (
774 zinfo.orig_filename, fname)
775
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776 # check for encrypted flag & handle password
777 is_encrypted = zinfo.flag_bits & 0x1
778 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000779 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780 if not pwd:
781 pwd = self.pwd
782 if not pwd:
783 raise RuntimeError, "File %s is encrypted, " \
784 "password required for extraction" % name
785
Thomas Wouterscf297e42007-02-23 15:07:44 +0000786 zd = _ZipDecrypter(pwd)
787 # The first 12 bytes in the cypher stream is an encryption header
788 # used to strengthen the algorithm. The first 11 bytes are
789 # completely random, while the 12th contains the MSB of the CRC,
790 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000791 bytes = zef_file.read(12)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000792 h = map(zd, bytes[0:12])
793 if ord(h[11]) != ((zinfo.CRC>>24)&255):
794 raise RuntimeError, "Bad password for file %s" % name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000795
796 # build and return a ZipExtFile
797 if zd is None:
798 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 zef = ZipExtFile(zef_file, zinfo, zd)
801
802 # set universal newlines on ZipExtFile if necessary
803 if "U" in mode:
804 zef.set_univ_newlines(True)
805 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000806
807 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000808 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000809 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000810 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000811 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 if self.mode not in ("w", "a"):
813 raise RuntimeError, 'write() requires mode "w" or "a"'
814 if not self.fp:
815 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000816 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
818 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000819 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
821 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000822 "That compression method is not supported"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000823 if zinfo.file_size > ZIP64_LIMIT:
824 if not self._allowZip64:
825 raise LargeZipFile("Filesize would require ZIP64 extensions")
826 if zinfo.header_offset > ZIP64_LIMIT:
827 if not self._allowZip64:
828 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829
830 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000831 """Put the bytes from filename into the archive under the name
832 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000834 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 date_time = mtime[0:6]
836 # Create ZipInfo instance to store file information
837 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000838 arcname = filename
839 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
840 while arcname[0] in (os.sep, os.altsep):
841 arcname = arcname[1:]
842 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000843 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000845 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000846 else:
Tim Peterse1190062001-01-15 03:34:38 +0000847 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000848
849 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000850 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000851 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000852
853 self._writecheck(zinfo)
854 self._didModify = True
855 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000856 # Must overwrite CRC and sizes with correct data later
857 zinfo.CRC = CRC = 0
858 zinfo.compress_size = compress_size = 0
859 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000860 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000861 if zinfo.compress_type == ZIP_DEFLATED:
862 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
863 zlib.DEFLATED, -15)
864 else:
865 cmpr = None
866 while 1:
867 buf = fp.read(1024 * 8)
868 if not buf:
869 break
870 file_size = file_size + len(buf)
871 CRC = binascii.crc32(buf, CRC)
872 if cmpr:
873 buf = cmpr.compress(buf)
874 compress_size = compress_size + len(buf)
875 self.fp.write(buf)
876 fp.close()
877 if cmpr:
878 buf = cmpr.flush()
879 compress_size = compress_size + len(buf)
880 self.fp.write(buf)
881 zinfo.compress_size = compress_size
882 else:
883 zinfo.compress_size = file_size
884 zinfo.CRC = CRC
885 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000886 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000887 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000888 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000889 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000890 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000891 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000892 self.filelist.append(zinfo)
893 self.NameToInfo[zinfo.filename] = zinfo
894
Just van Rossumb083cb32002-12-12 12:23:32 +0000895 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000896 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000897 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
898 the name of the file in the archive."""
899 if not isinstance(zinfo_or_arcname, ZipInfo):
900 zinfo = ZipInfo(filename=zinfo_or_arcname,
901 date_time=time.localtime(time.time()))
902 zinfo.compress_type = self.compression
903 else:
904 zinfo = zinfo_or_arcname
Tim Peterse1190062001-01-15 03:34:38 +0000905 zinfo.file_size = len(bytes) # Uncompressed size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000906 zinfo.header_offset = self.fp.tell() # Start of header bytes
907 self._writecheck(zinfo)
908 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000909 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910 if zinfo.compress_type == ZIP_DEFLATED:
911 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
912 zlib.DEFLATED, -15)
913 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000914 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 else:
916 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000917 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000918 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000919 self.fp.write(bytes)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000920 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000921 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000922 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000923 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000924 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000925 self.filelist.append(zinfo)
926 self.NameToInfo[zinfo.filename] = zinfo
927
928 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000929 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000930 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931
932 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000933 """Close the file, and for mode "w" and "a" write the ending
934 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000935 if self.fp is None:
936 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000937
938 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000939 count = 0
940 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000941 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000942 count = count + 1
943 dt = zinfo.date_time
944 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000945 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000946 extra = []
947 if zinfo.file_size > ZIP64_LIMIT \
948 or zinfo.compress_size > ZIP64_LIMIT:
949 extra.append(zinfo.file_size)
950 extra.append(zinfo.compress_size)
951 file_size = 0xffffffff #-1
952 compress_size = 0xffffffff #-1
953 else:
954 file_size = zinfo.file_size
955 compress_size = zinfo.compress_size
956
957 if zinfo.header_offset > ZIP64_LIMIT:
958 extra.append(zinfo.header_offset)
959 header_offset = -1 # struct "l" format: 32 one bits
960 else:
961 header_offset = zinfo.header_offset
962
963 extra_data = zinfo.extra
964 if extra:
965 # Append a ZIP64 field to the extra's
966 extra_data = struct.pack(
967 '<hh' + 'q'*len(extra),
968 1, 8*len(extra), *extra) + extra_data
969
970 extract_version = max(45, zinfo.extract_version)
971 create_version = max(45, zinfo.create_version)
972 else:
973 extract_version = zinfo.extract_version
974 create_version = zinfo.create_version
975
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000976 centdir = struct.pack(structCentralDir,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000977 stringCentralDir, create_version,
978 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000979 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000980 zinfo.CRC, compress_size, file_size,
981 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000982 0, zinfo.internal_attr, zinfo.external_attr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000983 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984 self.fp.write(centdir)
985 self.fp.write(zinfo.filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000986 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000988
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989 pos2 = self.fp.tell()
990 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000991 if pos1 > ZIP64_LIMIT:
992 # Need to write the ZIP64 end-of-archive records
993 zip64endrec = struct.pack(
994 structEndArchive64, stringEndArchive64,
995 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
996 self.fp.write(zip64endrec)
997
998 zip64locrec = struct.pack(
999 structEndArchive64Locator,
1000 stringEndArchive64Locator, 0, pos2, 1)
1001 self.fp.write(zip64locrec)
1002
1003 # XXX Why is `pos3` computed next? It's never referenced.
1004 pos3 = self.fp.tell()
1005 endrec = struct.pack(structEndArchive, stringEndArchive,
1006 0, 0, count, count, pos2 - pos1, -1, 0)
1007 self.fp.write(endrec)
1008
1009 else:
1010 endrec = struct.pack(structEndArchive, stringEndArchive,
1011 0, 0, count, count, pos2 - pos1, pos1, 0)
1012 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001013 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001014 if not self._filePassed:
1015 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 self.fp = None
1017
1018
1019class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001020 """Class to create ZIP archives with Python library files and packages."""
1021
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001022 def writepy(self, pathname, basename = ""):
1023 """Add all files from "pathname" to the ZIP archive.
1024
Fred Drake484d7352000-10-02 21:14:52 +00001025 If pathname is a package directory, search the directory and
1026 all package subdirectories recursively for all *.py and enter
1027 the modules into the archive. If pathname is a plain
1028 directory, listdir *.py and enter all modules. Else, pathname
1029 must be a Python *.py file and the module will be put into the
1030 archive. Added modules are always module.pyo or module.pyc.
1031 This method will compile the module.py into module.pyc if
1032 necessary.
1033 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 dir, name = os.path.split(pathname)
1035 if os.path.isdir(pathname):
1036 initname = os.path.join(pathname, "__init__.py")
1037 if os.path.isfile(initname):
1038 # This is a package directory, add it
1039 if basename:
1040 basename = "%s/%s" % (basename, name)
1041 else:
1042 basename = name
1043 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001044 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001045 fname, arcname = self._get_codename(initname[0:-3], basename)
1046 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001047 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 self.write(fname, arcname)
1049 dirlist = os.listdir(pathname)
1050 dirlist.remove("__init__.py")
1051 # Add all *.py files and package subdirectories
1052 for filename in dirlist:
1053 path = os.path.join(pathname, filename)
1054 root, ext = os.path.splitext(filename)
1055 if os.path.isdir(path):
1056 if os.path.isfile(os.path.join(path, "__init__.py")):
1057 # This is a package directory, add it
1058 self.writepy(path, basename) # Recursive call
1059 elif ext == ".py":
1060 fname, arcname = self._get_codename(path[0:-3],
1061 basename)
1062 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001063 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001064 self.write(fname, arcname)
1065 else:
1066 # This is NOT a package directory, add its files at top level
1067 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001068 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001069 for filename in os.listdir(pathname):
1070 path = os.path.join(pathname, filename)
1071 root, ext = os.path.splitext(filename)
1072 if ext == ".py":
1073 fname, arcname = self._get_codename(path[0:-3],
1074 basename)
1075 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001076 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001077 self.write(fname, arcname)
1078 else:
1079 if pathname[-3:] != ".py":
1080 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001081 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 fname, arcname = self._get_codename(pathname[0:-3], basename)
1083 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001084 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 self.write(fname, arcname)
1086
1087 def _get_codename(self, pathname, basename):
1088 """Return (filename, archivename) for the path.
1089
Fred Drake484d7352000-10-02 21:14:52 +00001090 Given a module name path, return the correct file path and
1091 archive name, compiling if necessary. For example, given
1092 /python/lib/string, return (/python/lib/string.pyc, string).
1093 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 file_py = pathname + ".py"
1095 file_pyc = pathname + ".pyc"
1096 file_pyo = pathname + ".pyo"
1097 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001098 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001099 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001101 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001102 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001104 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001105 try:
1106 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001107 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001108 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 fname = file_pyc
1110 else:
1111 fname = file_pyc
1112 archivename = os.path.split(fname)[1]
1113 if basename:
1114 archivename = "%s/%s" % (basename, archivename)
1115 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001116
1117
1118def main(args = None):
1119 import textwrap
1120 USAGE=textwrap.dedent("""\
1121 Usage:
1122 zipfile.py -l zipfile.zip # Show listing of a zipfile
1123 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1124 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1125 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1126 """)
1127 if args is None:
1128 args = sys.argv[1:]
1129
1130 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001131 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001132 sys.exit(1)
1133
1134 if args[0] == '-l':
1135 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001136 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001137 sys.exit(1)
1138 zf = ZipFile(args[1], 'r')
1139 zf.printdir()
1140 zf.close()
1141
1142 elif args[0] == '-t':
1143 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001144 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001145 sys.exit(1)
1146 zf = ZipFile(args[1], 'r')
1147 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001148 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001149
1150 elif args[0] == '-e':
1151 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001152 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001153 sys.exit(1)
1154
1155 zf = ZipFile(args[1], 'r')
1156 out = args[2]
1157 for path in zf.namelist():
1158 if path.startswith('./'):
1159 tgt = os.path.join(out, path[2:])
1160 else:
1161 tgt = os.path.join(out, path)
1162
1163 tgtdir = os.path.dirname(tgt)
1164 if not os.path.exists(tgtdir):
1165 os.makedirs(tgtdir)
1166 fp = open(tgt, 'wb')
1167 fp.write(zf.read(path))
1168 fp.close()
1169 zf.close()
1170
1171 elif args[0] == '-c':
1172 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001173 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001174 sys.exit(1)
1175
1176 def addToZip(zf, path, zippath):
1177 if os.path.isfile(path):
1178 zf.write(path, zippath, ZIP_DEFLATED)
1179 elif os.path.isdir(path):
1180 for nm in os.listdir(path):
1181 addToZip(zf,
1182 os.path.join(path, nm), os.path.join(zippath, nm))
1183 # else: ignore
1184
1185 zf = ZipFile(args[1], 'w', allowZip64=True)
1186 for src in args[2:]:
1187 addToZip(zf, src, os.path.basename(src))
1188
1189 zf.close()
1190
1191if __name__ == "__main__":
1192 main()