blob: ab9c93fed5c909539f00dd64cec7e8f834459daf [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00009except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010 zlib = None
11
Skip Montanaro40fc1602001-03-01 04:27:19 +000012__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000013 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000014
Fred Drake5db246d2000-09-29 20:44:48 +000015class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017
18
19class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000020 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
24
Tim Peterse1190062001-01-15 03:34:38 +000025error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Ronald Oussoren143cefb2006-06-15 08:14:18 +000027ZIP64_LIMIT= (1 << 31) - 1
28
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029# constants for Zip file compression methods
30ZIP_STORED = 0
31ZIP_DEFLATED = 8
32# Other ZIP compression methods not supported
33
34# Here are some struct module formats for reading headers
35structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
36stringEndArchive = "PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000037structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000039structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringFileHeader = "PK\003\004" # magic number for file header
Ronald Oussoren143cefb2006-06-15 08:14:18 +000041structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Fred Drake3e038e52001-02-28 17:56:26 +000047# indexes of entries in the central directory structure
48_CD_SIGNATURE = 0
49_CD_CREATE_VERSION = 1
50_CD_CREATE_SYSTEM = 2
51_CD_EXTRACT_VERSION = 3
52_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53_CD_FLAG_BITS = 5
54_CD_COMPRESS_TYPE = 6
55_CD_TIME = 7
56_CD_DATE = 8
57_CD_CRC = 9
58_CD_COMPRESSED_SIZE = 10
59_CD_UNCOMPRESSED_SIZE = 11
60_CD_FILENAME_LENGTH = 12
61_CD_EXTRA_FIELD_LENGTH = 13
62_CD_COMMENT_LENGTH = 14
63_CD_DISK_NUMBER_START = 15
64_CD_INTERNAL_FILE_ATTRIBUTES = 16
65_CD_EXTERNAL_FILE_ATTRIBUTES = 17
66_CD_LOCAL_HEADER_OFFSET = 18
67
68# indexes of entries in the local file header structure
69_FH_SIGNATURE = 0
70_FH_EXTRACT_VERSION = 1
71_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72_FH_GENERAL_PURPOSE_FLAG_BITS = 3
73_FH_COMPRESSION_METHOD = 4
74_FH_LAST_MOD_TIME = 5
75_FH_LAST_MOD_DATE = 6
76_FH_CRC = 7
77_FH_COMPRESSED_SIZE = 8
78_FH_UNCOMPRESSED_SIZE = 9
79_FH_FILENAME_LENGTH = 10
80_FH_EXTRA_FIELD_LENGTH = 11
81
Guido van Rossum32abe6f2000-03-31 17:30:02 +000082def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000083 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084 try:
85 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000086 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000087 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000089 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000090 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000092 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093
Ronald Oussoren143cefb2006-06-15 08:14:18 +000094def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000102 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000103 return endrec
104
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
107
Tim Petersa608bb22006-06-15 18:06:29 +0000108 # Assume no 'zip64 extensible data'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000115 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000116 return endrec
117
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
126
127
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000128def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
130
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000163 return endrec
164 return # Error, return None
165
Fred Drake484d7352000-10-02 21:14:52 +0000166
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000167class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000168 """Class with attributes describing each file in the ZIP archive."""
169
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
189 )
190
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000192 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000196 null_byte = filename.find(chr(0))
197 if null_byte >= 0:
198 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000203 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000204
Greg Ward8e36d282003-06-18 00:53:06 +0000205 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000206 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000207 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.compress_type = ZIP_STORED # Type of compression for the file
209 self.comment = "" # Comment for each file
210 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000211 if sys.platform == 'win32':
212 self.create_system = 0 # System which created ZIP archive
213 else:
214 # Assume everything else is unix-y
215 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000216 self.create_version = 20 # Version which created ZIP archive
217 self.extract_version = 20 # Version needed to extract archive
218 self.reserved = 0 # Must be zero
219 self.flag_bits = 0 # ZIP flag bits
220 self.volume = 0 # Volume number of file header
221 self.internal_attr = 0 # Internal attributes
222 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000223 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000224 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228
229 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000230 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231 dt = self.date_time
232 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000233 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000235 # Set these to zero because we write them after the file data
236 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 else:
Tim Peterse1190062001-01-15 03:34:38 +0000238 CRC = self.CRC
239 compress_size = self.compress_size
240 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000241
242 extra = self.extra
243
244 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
247 fmt = '<hhqq'
248 extra = extra + struct.pack(fmt,
249 1, struct.calcsize(fmt)-4, file_size, compress_size)
250 file_size = 0xffffffff # -1
251 compress_size = 0xffffffff # -1
252 self.extract_version = max(45, self.extract_version)
253 self.create_version = max(45, self.extract_version)
254
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000255 header = struct.pack(structFileHeader, stringFileHeader,
256 self.extract_version, self.reserved, self.flag_bits,
257 self.compress_type, dostime, dosdate, CRC,
258 compress_size, file_size,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000259 len(self.filename), len(extra))
260 return header + self.filename + extra
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000261
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262 def _decodeExtra(self):
263 # Try to decode the extra field.
264 extra = self.extra
265 unpack = struct.unpack
266 while extra:
267 tp, ln = unpack('<hh', extra[:4])
268 if tp == 1:
269 if ln >= 24:
270 counts = unpack('<qqq', extra[4:28])
271 elif ln == 16:
272 counts = unpack('<qq', extra[4:20])
273 elif ln == 8:
274 counts = unpack('<q', extra[4:12])
275 elif ln == 0:
276 counts = ()
277 else:
278 raise RuntimeError, "Corrupt extra field %s"%(ln,)
279
280 idx = 0
281
282 # ZIP64 extension (large files and/or large archives)
283 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
284 self.file_size = counts[idx]
285 idx += 1
286
287 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
288 self.compress_size = counts[idx]
289 idx += 1
290
291 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
292 old = self.header_offset
293 self.header_offset = counts[idx]
294 idx+=1
295
296 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000297
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000299class _ZipDecrypter:
300 """Class to handle decryption of files stored within a ZIP archive.
301
302 ZIP supports a password-based form of encryption. Even though known
303 plaintext attacks have been found against it, it is still useful
304 for low-level securicy.
305
306 Usage:
307 zd = _ZipDecrypter(mypwd)
308 plain_char = zd(cypher_char)
309 plain_text = map(zd, cypher_text)
310 """
311
312 def _GenerateCRCTable():
313 """Generate a CRC-32 table.
314
315 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
316 internal keys. We noticed that a direct implementation is faster than
317 relying on binascii.crc32().
318 """
319 poly = 0xedb88320
320 table = [0] * 256
321 for i in range(256):
322 crc = i
323 for j in range(8):
324 if crc & 1:
325 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
326 else:
327 crc = ((crc >> 1) & 0x7FFFFFFF)
328 table[i] = crc
329 return table
330 crctable = _GenerateCRCTable()
331
332 def _crc32(self, ch, crc):
333 """Compute the CRC32 primitive on one byte."""
334 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
335
336 def __init__(self, pwd):
337 self.key0 = 305419896
338 self.key1 = 591751049
339 self.key2 = 878082192
340 for p in pwd:
341 self._UpdateKeys(p)
342
343 def _UpdateKeys(self, c):
344 self.key0 = self._crc32(c, self.key0)
345 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
346 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
347 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
348
349 def __call__(self, c):
350 """Decrypt a single character."""
351 c = ord(c)
352 k = self.key2 | 2
353 c = c ^ (((k * (k^1)) >> 8) & 255)
354 c = chr(c)
355 self._UpdateKeys(c)
356 return c
357
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000358class ZipExtFile:
359 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000360 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000361 """
Tim Petersea5962f2007-03-12 18:07:52 +0000362
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000363 def __init__(self, fileobj, zipinfo, decrypt=None):
364 self.fileobj = fileobj
365 self.decrypter = decrypt
366 self.bytes_read = 0L
367 self.rawbuffer = ''
368 self.readbuffer = ''
369 self.linebuffer = ''
370 self.eof = False
371 self.univ_newlines = False
372 self.nlSeps = ("\n", )
373 self.lastdiscard = ''
374
375 self.compress_type = zipinfo.compress_type
376 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000377
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000378 self.closed = False
379 self.mode = "r"
380 self.name = zipinfo.filename
381
382 # read from compressed files in 64k blocks
383 self.compreadsize = 64*1024
384 if self.compress_type == ZIP_DEFLATED:
385 self.dc = zlib.decompressobj(-15)
386
387 def set_univ_newlines(self, univ_newlines):
388 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000389
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000390 # pick line separator char(s) based on universal newlines flag
391 self.nlSeps = ("\n", )
392 if self.univ_newlines:
393 self.nlSeps = ("\r\n", "\r", "\n")
394
395 def __iter__(self):
396 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000397
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000398 def next(self):
399 nextline = self.readline()
400 if not nextline:
401 raise StopIteration()
402
403 return nextline
404
405 def close(self):
406 self.closed = True
407
408 def _checkfornewline(self):
409 nl, nllen = -1, -1
410 if self.linebuffer:
411 # ugly check for cases where half of an \r\n pair was
412 # read on the last pass, and the \r was discarded. In this
413 # case we just throw away the \n at the start of the buffer.
414 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
415 self.linebuffer = self.linebuffer[1:]
416
Tim Petersea5962f2007-03-12 18:07:52 +0000417 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000418 nl = self.linebuffer.find(sep)
419 if nl >= 0:
420 nllen = len(sep)
421 return nl, nllen
422
423 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000424
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000425 def readline(self, size = -1):
426 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000427 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000428 """
429 if size < 0:
430 size = sys.maxint
431 elif size == 0:
432 return ''
433
434 # check for a newline already in buffer
435 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000436
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000437 if nl >= 0:
438 # the next line was already in the buffer
439 nl = min(nl, size)
440 else:
441 # no line break in buffer - try to read more
442 size -= len(self.linebuffer)
443 while nl < 0 and size > 0:
444 buf = self.read(min(size, 100))
445 if not buf:
446 break
447 self.linebuffer += buf
448 size -= len(buf)
449
450 # check for a newline in buffer
451 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000452
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000453 # we either ran out of bytes in the file, or
454 # met the specified size limit without finding a newline,
455 # so return current buffer
456 if nl < 0:
457 s = self.linebuffer
458 self.linebuffer = ''
459 return s
460
461 buf = self.linebuffer[:nl]
462 self.lastdiscard = self.linebuffer[nl:nl + nllen]
463 self.linebuffer = self.linebuffer[nl + nllen:]
464
465 # line is always returned with \n as newline char (except possibly
466 # for a final incomplete line in the file, which is handled above).
467 return buf + "\n"
468
469 def readlines(self, sizehint = -1):
470 """Return a list with all (following) lines. The sizehint parameter
471 is ignored in this implementation.
472 """
473 result = []
474 while True:
475 line = self.readline()
476 if not line: break
477 result.append(line)
478 return result
479
480 def read(self, size = None):
481 # act like file() obj and return empty string if size is 0
482 if size == 0:
483 return ''
484
485 # determine read size
486 bytesToRead = self.compress_size - self.bytes_read
487
488 # adjust read size for encrypted files since the first 12 bytes
489 # are for the encryption/password information
490 if self.decrypter is not None:
491 bytesToRead -= 12
492
493 if size is not None and size >= 0:
494 if self.compress_type == ZIP_STORED:
495 lr = len(self.readbuffer)
496 bytesToRead = min(bytesToRead, size - lr)
497 elif self.compress_type == ZIP_DEFLATED:
498 if len(self.readbuffer) > size:
499 # the user has requested fewer bytes than we've already
500 # pulled through the decompressor; don't read any more
501 bytesToRead = 0
502 else:
503 # user will use up the buffer, so read some more
504 lr = len(self.rawbuffer)
505 bytesToRead = min(bytesToRead, self.compreadsize - lr)
506
507 # avoid reading past end of file contents
508 if bytesToRead + self.bytes_read > self.compress_size:
509 bytesToRead = self.compress_size - self.bytes_read
510
511 # try to read from file (if necessary)
512 if bytesToRead > 0:
513 bytes = self.fileobj.read(bytesToRead)
514 self.bytes_read += len(bytes)
515 self.rawbuffer += bytes
516
517 # handle contents of raw buffer
518 if self.rawbuffer:
519 newdata = self.rawbuffer
520 self.rawbuffer = ''
521
522 # decrypt new data if we were given an object to handle that
523 if newdata and self.decrypter is not None:
524 newdata = ''.join(map(self.decrypter, newdata))
525
526 # decompress newly read data if necessary
527 if newdata and self.compress_type == ZIP_DEFLATED:
528 newdata = self.dc.decompress(newdata)
529 self.rawbuffer = self.dc.unconsumed_tail
530 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000531 # we're out of raw bytes (both from the file and
532 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000533 # decompressor is done
534 newdata += self.dc.flush()
535 # prevent decompressor from being used again
536 self.dc = None
537
538 self.readbuffer += newdata
539
540
541 # return what the user asked for
542 if size is None or len(self.readbuffer) <= size:
543 bytes = self.readbuffer
544 self.readbuffer = ''
545 else:
546 bytes = self.readbuffer[:size]
547 self.readbuffer = self.readbuffer[size:]
548
549 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000550
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000551
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000552class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000553 """ Class with methods to open, read, write, close, list zip files.
554
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000555 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000556
Fred Drake3d9091e2001-03-26 15:49:24 +0000557 file: Either the path to the file, or a file-like object.
558 If it is a path, the file will be opened and closed by ZipFile.
559 mode: The mode can be either read "r", write "w" or append "a".
560 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000561 allowZip64: if True ZipFile will create files with ZIP64 extensions when
562 needed, otherwise it will raise an exception when this would
563 be necessary.
564
Fred Drake3d9091e2001-03-26 15:49:24 +0000565 """
Fred Drake484d7352000-10-02 21:14:52 +0000566
Fred Drake90eac282001-02-28 05:29:34 +0000567 fp = None # Set here since __del__ checks it
568
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000569 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000570 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000571 if mode not in ("r", "w", "a"):
572 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
573
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000574 if compression == ZIP_STORED:
575 pass
576 elif compression == ZIP_DEFLATED:
577 if not zlib:
578 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000579 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000580 else:
581 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000582
583 self._allowZip64 = allowZip64
584 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000585 self.debug = 0 # Level of printing: 0 through 3
586 self.NameToInfo = {} # Find file info given name
587 self.filelist = [] # List of ZipInfo instances for archive
588 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000589 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000590 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000591
Fred Drake3d9091e2001-03-26 15:49:24 +0000592 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000593 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000594 self._filePassed = 0
595 self.filename = file
596 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000597 try:
598 self.fp = open(file, modeDict[mode])
599 except IOError:
600 if mode == 'a':
601 mode = key = 'w'
602 self.fp = open(file, modeDict[mode])
603 else:
604 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000605 else:
606 self._filePassed = 1
607 self.fp = file
608 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000609
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000610 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000611 self._GetContents()
612 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000613 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000614 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000615 try: # See if file is a zip file
616 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000618 self.fp.seek(self.start_dir, 0)
619 except BadZipfile: # file is not a zip file, just append
620 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000621 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000622 if not self._filePassed:
623 self.fp.close()
624 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000625 raise RuntimeError, 'Mode must be "r", "w" or "a"'
626
627 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000628 """Read the directory, making sure we close the file if the format
629 is bad."""
630 try:
631 self._RealGetContents()
632 except BadZipfile:
633 if not self._filePassed:
634 self.fp.close()
635 self.fp = None
636 raise
637
638 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000639 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000640 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000641 endrec = _EndRecData(fp)
642 if not endrec:
643 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000644 if self.debug > 1:
645 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000646 size_cd = endrec[5] # bytes in central directory
647 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000648 self.comment = endrec[8] # archive comment
649 # endrec[9] is the offset of the "End of Central Dir" record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000650 if endrec[9] > ZIP64_LIMIT:
651 x = endrec[9] - size_cd - 56 - 20
652 else:
653 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000654 # "concat" is zero, unless zip was concatenated to another file
655 concat = x - offset_cd
656 if self.debug > 2:
657 print "given, inferred, offset", offset_cd, x, concat
658 # self.start_dir: Position of start of central directory
659 self.start_dir = offset_cd + concat
660 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000661 data = fp.read(size_cd)
662 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000663 total = 0
664 while total < size_cd:
665 centdir = fp.read(46)
666 total = total + 46
667 if centdir[0:4] != stringCentralDir:
668 raise BadZipfile, "Bad magic number for central directory"
669 centdir = struct.unpack(structCentralDir, centdir)
670 if self.debug > 2:
671 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000672 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000673 # Create ZipInfo instance to store file information
674 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000675 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
676 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
677 total = (total + centdir[_CD_FILENAME_LENGTH]
678 + centdir[_CD_EXTRA_FIELD_LENGTH]
679 + centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000680 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000681 (x.create_version, x.create_system, x.extract_version, x.reserved,
682 x.flag_bits, x.compress_type, t, d,
683 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
684 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
685 # Convert date/time code to (year, month, day, hour, min, sec)
686 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000687 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000688
689 x._decodeExtra()
690 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000691 self.filelist.append(x)
692 self.NameToInfo[x.filename] = x
693 if self.debug > 2:
694 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000695
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696
697 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000698 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699 l = []
700 for data in self.filelist:
701 l.append(data.filename)
702 return l
703
704 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000705 """Return a list of class ZipInfo instances for files in the
706 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 return self.filelist
708
709 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000710 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
712 for zinfo in self.filelist:
713 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
714 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
715
716 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000717 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 for zinfo in self.filelist:
719 try:
Tim Peterse1190062001-01-15 03:34:38 +0000720 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000721 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 return zinfo.filename
723
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000724
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000726 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000727 info = self.NameToInfo.get(name)
728 if info is None:
729 raise KeyError(
730 'There is no item named %r in the archive' % name)
731
732 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000733
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000734 def setpassword(self, pwd):
735 """Set default password for encrypted files."""
736 self.pwd = pwd
737
738 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000739 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000740 return self.open(name, "r", pwd).read()
741
742 def open(self, name, mode="r", pwd=None):
743 """Return file-like object for 'name'."""
744 if mode not in ("r", "U", "rU"):
745 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746 if not self.fp:
747 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000748 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000749
Tim Petersea5962f2007-03-12 18:07:52 +0000750 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000751 # given a file object in the constructor
752 if self._filePassed:
753 zef_file = self.fp
754 else:
755 zef_file = open(self.filename, 'rb')
756
757 # Get info object for name
758 zinfo = self.getinfo(name)
759
760 filepos = zef_file.tell()
761
762 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000763
764 # Skip the file header:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000765 fheader = zef_file.read(30)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000766 if fheader[0:4] != stringFileHeader:
767 raise BadZipfile, "Bad magic number for file header"
768
769 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000770 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000771 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000772 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000773
774 if fname != zinfo.orig_filename:
775 raise BadZipfile, \
776 'File name in directory "%s" and header "%s" differ.' % (
777 zinfo.orig_filename, fname)
778
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000779 # check for encrypted flag & handle password
780 is_encrypted = zinfo.flag_bits & 0x1
781 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000782 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000783 if not pwd:
784 pwd = self.pwd
785 if not pwd:
786 raise RuntimeError, "File %s is encrypted, " \
787 "password required for extraction" % name
788
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000789 zd = _ZipDecrypter(pwd)
790 # The first 12 bytes in the cypher stream is an encryption header
791 # used to strengthen the algorithm. The first 11 bytes are
792 # completely random, while the 12th contains the MSB of the CRC,
793 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000794 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000795 h = map(zd, bytes[0:12])
796 if ord(h[11]) != ((zinfo.CRC>>24)&255):
797 raise RuntimeError, "Bad password for file %s" % name
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000798
799 # build and return a ZipExtFile
800 if zd is None:
801 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000803 zef = ZipExtFile(zef_file, zinfo, zd)
804
805 # set universal newlines on ZipExtFile if necessary
806 if "U" in mode:
807 zef.set_univ_newlines(True)
808 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809
Georg Brandl62416bc2008-01-07 18:47:44 +0000810 def extract(self, member, path=None, pwd=None):
811 """Extract a member from the archive to the current working directory,
812 using its full name. Its file information is extracted as accurately
813 as possible. `member' may be a filename or a ZipInfo object. You can
814 specify a different directory using `path'.
815 """
816 if not isinstance(member, ZipInfo):
817 member = self.getinfo(member)
818
819 if path is None:
820 path = os.getcwd()
821
822 return self._extract_member(member, path, pwd)
823
824 def extractall(self, path=None, members=None, pwd=None):
825 """Extract all members from the archive to the current working
826 directory. `path' specifies a different directory to extract to.
827 `members' is optional and must be a subset of the list returned
828 by namelist().
829 """
830 if members is None:
831 members = self.namelist()
832
833 for zipinfo in members:
834 self.extract(zipinfo, path, pwd)
835
836 def _extract_member(self, member, targetpath, pwd):
837 """Extract the ZipInfo object 'member' to a physical
838 file on the path targetpath.
839 """
840 # build the destination pathname, replacing
841 # forward slashes to platform specific separators.
842 if targetpath[-1:] == "/":
843 targetpath = targetpath[:-1]
844
845 # don't include leading "/" from file name if present
846 if os.path.isabs(member.filename):
847 targetpath = os.path.join(targetpath, member.filename[1:])
848 else:
849 targetpath = os.path.join(targetpath, member.filename)
850
851 targetpath = os.path.normpath(targetpath)
852
853 # Create all upper directories if necessary.
854 upperdirs = os.path.dirname(targetpath)
855 if upperdirs and not os.path.exists(upperdirs):
856 os.makedirs(upperdirs)
857
858 source = self.open(member.filename, pwd=pwd)
859 target = file(targetpath, "wb")
860 shutil.copyfileobj(source, target)
861 source.close()
862 target.close()
863
864 return targetpath
865
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000866 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000867 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000868 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000869 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000870 print "Duplicate name:", zinfo.filename
871 if self.mode not in ("w", "a"):
872 raise RuntimeError, 'write() requires mode "w" or "a"'
873 if not self.fp:
874 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000875 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000876 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
877 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000878 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000879 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
880 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000881 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000882 if zinfo.file_size > ZIP64_LIMIT:
883 if not self._allowZip64:
884 raise LargeZipFile("Filesize would require ZIP64 extensions")
885 if zinfo.header_offset > ZIP64_LIMIT:
886 if not self._allowZip64:
887 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000888
889 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000890 """Put the bytes from filename into the archive under the name
891 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000892 if not self.fp:
893 raise RuntimeError(
894 "Attempt to write to ZIP archive that was already closed")
895
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000896 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000897 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 date_time = mtime[0:6]
899 # Create ZipInfo instance to store file information
900 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000901 arcname = filename
902 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
903 while arcname[0] in (os.sep, os.altsep):
904 arcname = arcname[1:]
905 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000906 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000908 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 else:
Tim Peterse1190062001-01-15 03:34:38 +0000910 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000911
912 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000913 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000914 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000915
916 self._writecheck(zinfo)
917 self._didModify = True
918 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000919 # Must overwrite CRC and sizes with correct data later
920 zinfo.CRC = CRC = 0
921 zinfo.compress_size = compress_size = 0
922 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000923 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000924 if zinfo.compress_type == ZIP_DEFLATED:
925 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
926 zlib.DEFLATED, -15)
927 else:
928 cmpr = None
929 while 1:
930 buf = fp.read(1024 * 8)
931 if not buf:
932 break
933 file_size = file_size + len(buf)
934 CRC = binascii.crc32(buf, CRC)
935 if cmpr:
936 buf = cmpr.compress(buf)
937 compress_size = compress_size + len(buf)
938 self.fp.write(buf)
939 fp.close()
940 if cmpr:
941 buf = cmpr.flush()
942 compress_size = compress_size + len(buf)
943 self.fp.write(buf)
944 zinfo.compress_size = compress_size
945 else:
946 zinfo.compress_size = file_size
947 zinfo.CRC = CRC
948 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000949 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000950 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000951 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000952 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000953 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000954 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000955 self.filelist.append(zinfo)
956 self.NameToInfo[zinfo.filename] = zinfo
957
Just van Rossumb083cb32002-12-12 12:23:32 +0000958 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000959 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000960 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
961 the name of the file in the archive."""
962 if not isinstance(zinfo_or_arcname, ZipInfo):
963 zinfo = ZipInfo(filename=zinfo_or_arcname,
964 date_time=time.localtime(time.time()))
965 zinfo.compress_type = self.compression
966 else:
967 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000968
969 if not self.fp:
970 raise RuntimeError(
971 "Attempt to write to ZIP archive that was already closed")
972
Tim Peterse1190062001-01-15 03:34:38 +0000973 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000974 zinfo.header_offset = self.fp.tell() # Start of header bytes
975 self._writecheck(zinfo)
976 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000977 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000978 if zinfo.compress_type == ZIP_DEFLATED:
979 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
980 zlib.DEFLATED, -15)
981 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000982 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000983 else:
984 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000985 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000986 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000988 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000990 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000991 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000992 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000993 self.filelist.append(zinfo)
994 self.NameToInfo[zinfo.filename] = zinfo
995
996 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000997 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000998 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999
1000 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001001 """Close the file, and for mode "w" and "a" write the ending
1002 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001003 if self.fp is None:
1004 return
Tim Petersa608bb22006-06-15 18:06:29 +00001005
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001006 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 count = 0
1008 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001009 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 count = count + 1
1011 dt = zinfo.date_time
1012 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001013 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001014 extra = []
1015 if zinfo.file_size > ZIP64_LIMIT \
1016 or zinfo.compress_size > ZIP64_LIMIT:
1017 extra.append(zinfo.file_size)
1018 extra.append(zinfo.compress_size)
1019 file_size = 0xffffffff #-1
1020 compress_size = 0xffffffff #-1
1021 else:
1022 file_size = zinfo.file_size
1023 compress_size = zinfo.compress_size
1024
1025 if zinfo.header_offset > ZIP64_LIMIT:
1026 extra.append(zinfo.header_offset)
Tim Petersf79c32d2006-07-31 02:53:03 +00001027 header_offset = -1 # struct "l" format: 32 one bits
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001028 else:
1029 header_offset = zinfo.header_offset
1030
1031 extra_data = zinfo.extra
1032 if extra:
1033 # Append a ZIP64 field to the extra's
1034 extra_data = struct.pack(
1035 '<hh' + 'q'*len(extra),
1036 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001037
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001038 extract_version = max(45, zinfo.extract_version)
1039 create_version = max(45, zinfo.create_version)
1040 else:
1041 extract_version = zinfo.extract_version
1042 create_version = zinfo.create_version
1043
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001044 centdir = struct.pack(structCentralDir,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001045 stringCentralDir, create_version,
1046 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001048 zinfo.CRC, compress_size, file_size,
1049 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 0, zinfo.internal_attr, zinfo.external_attr,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001051 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001052 self.fp.write(centdir)
1053 self.fp.write(zinfo.filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001054 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001055 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001056
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001057 pos2 = self.fp.tell()
1058 # Write end-of-zip-archive record
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001059 if pos1 > ZIP64_LIMIT:
1060 # Need to write the ZIP64 end-of-archive records
1061 zip64endrec = struct.pack(
1062 structEndArchive64, stringEndArchive64,
1063 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1064 self.fp.write(zip64endrec)
1065
1066 zip64locrec = struct.pack(
Tim Petersa608bb22006-06-15 18:06:29 +00001067 structEndArchive64Locator,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001068 stringEndArchive64Locator, 0, pos2, 1)
1069 self.fp.write(zip64locrec)
1070
Tim Peters352bf0d2006-07-31 02:40:23 +00001071 # XXX Why is `pos3` computed next? It's never referenced.
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001072 pos3 = self.fp.tell()
1073 endrec = struct.pack(structEndArchive, stringEndArchive,
Tim Peters352bf0d2006-07-31 02:40:23 +00001074 0, 0, count, count, pos2 - pos1, -1, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001075 self.fp.write(endrec)
1076
1077 else:
1078 endrec = struct.pack(structEndArchive, stringEndArchive,
1079 0, 0, count, count, pos2 - pos1, pos1, 0)
1080 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001081 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001082 if not self._filePassed:
1083 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001084 self.fp = None
1085
1086
1087class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001088 """Class to create ZIP archives with Python library files and packages."""
1089
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 def writepy(self, pathname, basename = ""):
1091 """Add all files from "pathname" to the ZIP archive.
1092
Fred Drake484d7352000-10-02 21:14:52 +00001093 If pathname is a package directory, search the directory and
1094 all package subdirectories recursively for all *.py and enter
1095 the modules into the archive. If pathname is a plain
1096 directory, listdir *.py and enter all modules. Else, pathname
1097 must be a Python *.py file and the module will be put into the
1098 archive. Added modules are always module.pyo or module.pyc.
1099 This method will compile the module.py into module.pyc if
1100 necessary.
1101 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 dir, name = os.path.split(pathname)
1103 if os.path.isdir(pathname):
1104 initname = os.path.join(pathname, "__init__.py")
1105 if os.path.isfile(initname):
1106 # This is a package directory, add it
1107 if basename:
1108 basename = "%s/%s" % (basename, name)
1109 else:
1110 basename = name
1111 if self.debug:
1112 print "Adding package in", pathname, "as", basename
1113 fname, arcname = self._get_codename(initname[0:-3], basename)
1114 if self.debug:
1115 print "Adding", arcname
1116 self.write(fname, arcname)
1117 dirlist = os.listdir(pathname)
1118 dirlist.remove("__init__.py")
1119 # Add all *.py files and package subdirectories
1120 for filename in dirlist:
1121 path = os.path.join(pathname, filename)
1122 root, ext = os.path.splitext(filename)
1123 if os.path.isdir(path):
1124 if os.path.isfile(os.path.join(path, "__init__.py")):
1125 # This is a package directory, add it
1126 self.writepy(path, basename) # Recursive call
1127 elif ext == ".py":
1128 fname, arcname = self._get_codename(path[0:-3],
1129 basename)
1130 if self.debug:
1131 print "Adding", arcname
1132 self.write(fname, arcname)
1133 else:
1134 # This is NOT a package directory, add its files at top level
1135 if self.debug:
1136 print "Adding files from directory", pathname
1137 for filename in os.listdir(pathname):
1138 path = os.path.join(pathname, filename)
1139 root, ext = os.path.splitext(filename)
1140 if ext == ".py":
1141 fname, arcname = self._get_codename(path[0:-3],
1142 basename)
1143 if self.debug:
1144 print "Adding", arcname
1145 self.write(fname, arcname)
1146 else:
1147 if pathname[-3:] != ".py":
1148 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001149 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001150 fname, arcname = self._get_codename(pathname[0:-3], basename)
1151 if self.debug:
1152 print "Adding file", arcname
1153 self.write(fname, arcname)
1154
1155 def _get_codename(self, pathname, basename):
1156 """Return (filename, archivename) for the path.
1157
Fred Drake484d7352000-10-02 21:14:52 +00001158 Given a module name path, return the correct file path and
1159 archive name, compiling if necessary. For example, given
1160 /python/lib/string, return (/python/lib/string.pyc, string).
1161 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001162 file_py = pathname + ".py"
1163 file_pyc = pathname + ".pyc"
1164 file_pyo = pathname + ".pyo"
1165 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001166 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001167 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001168 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001169 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001170 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 if self.debug:
1172 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001173 try:
1174 py_compile.compile(file_py, file_pyc, None, True)
1175 except py_compile.PyCompileError,err:
1176 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177 fname = file_pyc
1178 else:
1179 fname = file_pyc
1180 archivename = os.path.split(fname)[1]
1181 if basename:
1182 archivename = "%s/%s" % (basename, archivename)
1183 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001184
1185
1186def main(args = None):
1187 import textwrap
1188 USAGE=textwrap.dedent("""\
1189 Usage:
1190 zipfile.py -l zipfile.zip # Show listing of a zipfile
1191 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1192 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1193 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1194 """)
1195 if args is None:
1196 args = sys.argv[1:]
1197
1198 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1199 print USAGE
1200 sys.exit(1)
1201
1202 if args[0] == '-l':
1203 if len(args) != 2:
1204 print USAGE
1205 sys.exit(1)
1206 zf = ZipFile(args[1], 'r')
1207 zf.printdir()
1208 zf.close()
1209
1210 elif args[0] == '-t':
1211 if len(args) != 2:
1212 print USAGE
1213 sys.exit(1)
1214 zf = ZipFile(args[1], 'r')
1215 zf.testzip()
1216 print "Done testing"
1217
1218 elif args[0] == '-e':
1219 if len(args) != 3:
1220 print USAGE
1221 sys.exit(1)
1222
1223 zf = ZipFile(args[1], 'r')
1224 out = args[2]
1225 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001226 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001227 tgt = os.path.join(out, path[2:])
1228 else:
1229 tgt = os.path.join(out, path)
1230
1231 tgtdir = os.path.dirname(tgt)
1232 if not os.path.exists(tgtdir):
1233 os.makedirs(tgtdir)
1234 fp = open(tgt, 'wb')
1235 fp.write(zf.read(path))
1236 fp.close()
1237 zf.close()
1238
1239 elif args[0] == '-c':
1240 if len(args) < 3:
1241 print USAGE
1242 sys.exit(1)
1243
1244 def addToZip(zf, path, zippath):
1245 if os.path.isfile(path):
1246 zf.write(path, zippath, ZIP_DEFLATED)
1247 elif os.path.isdir(path):
1248 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001249 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001250 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001251 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001252
1253 zf = ZipFile(args[1], 'w', allowZip64=True)
1254 for src in args[2:]:
1255 addToZip(zf, src, os.path.basename(src))
1256
1257 zf.close()
1258
1259if __name__ == "__main__":
1260 main()