blob: 67d2c5d8d2f09586ff3b6e54e83444e5ab516023 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Martin v. Löwis00756902006-02-05 17:09:41 +00004import struct, os, time, sys
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00009except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010 zlib = None
11
Skip Montanaro40fc1602001-03-01 04:27:19 +000012__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000013 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000014
Fred Drake5db246d2000-09-29 20:44:48 +000015class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017
18
19class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000020 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
24
Tim Peterse1190062001-01-15 03:34:38 +000025error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Ronald Oussoren143cefb2006-06-15 08:14:18 +000027ZIP64_LIMIT= (1 << 31) - 1
28
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029# constants for Zip file compression methods
30ZIP_STORED = 0
31ZIP_DEFLATED = 8
32# Other ZIP compression methods not supported
33
34# Here are some struct module formats for reading headers
35structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
36stringEndArchive = "PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000037structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000039structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringFileHeader = "PK\003\004" # magic number for file header
Ronald Oussoren143cefb2006-06-15 08:14:18 +000041structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Fred Drake3e038e52001-02-28 17:56:26 +000047# indexes of entries in the central directory structure
48_CD_SIGNATURE = 0
49_CD_CREATE_VERSION = 1
50_CD_CREATE_SYSTEM = 2
51_CD_EXTRACT_VERSION = 3
52_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53_CD_FLAG_BITS = 5
54_CD_COMPRESS_TYPE = 6
55_CD_TIME = 7
56_CD_DATE = 8
57_CD_CRC = 9
58_CD_COMPRESSED_SIZE = 10
59_CD_UNCOMPRESSED_SIZE = 11
60_CD_FILENAME_LENGTH = 12
61_CD_EXTRA_FIELD_LENGTH = 13
62_CD_COMMENT_LENGTH = 14
63_CD_DISK_NUMBER_START = 15
64_CD_INTERNAL_FILE_ATTRIBUTES = 16
65_CD_EXTERNAL_FILE_ATTRIBUTES = 17
66_CD_LOCAL_HEADER_OFFSET = 18
67
68# indexes of entries in the local file header structure
69_FH_SIGNATURE = 0
70_FH_EXTRACT_VERSION = 1
71_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72_FH_GENERAL_PURPOSE_FLAG_BITS = 3
73_FH_COMPRESSION_METHOD = 4
74_FH_LAST_MOD_TIME = 5
75_FH_LAST_MOD_DATE = 6
76_FH_CRC = 7
77_FH_COMPRESSED_SIZE = 8
78_FH_UNCOMPRESSED_SIZE = 9
79_FH_FILENAME_LENGTH = 10
80_FH_EXTRA_FIELD_LENGTH = 11
81
Guido van Rossum32abe6f2000-03-31 17:30:02 +000082def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000083 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084 try:
85 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000086 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000087 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000089 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000090 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000092 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093
Ronald Oussoren143cefb2006-06-15 08:14:18 +000094def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000102 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000103 return endrec
104
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
107
Tim Petersa608bb22006-06-15 18:06:29 +0000108 # Assume no 'zip64 extensible data'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000115 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000116 return endrec
117
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
126
127
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000128def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
130
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000163 return endrec
164 return # Error, return None
165
Fred Drake484d7352000-10-02 21:14:52 +0000166
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000167class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000168 """Class with attributes describing each file in the ZIP archive."""
169
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
189 )
190
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000192 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000193
194 # Terminate the file name at the first null byte. Null bytes in file
195 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000196 null_byte = filename.find(chr(0))
197 if null_byte >= 0:
198 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000199 # This is used to ensure paths in generated ZIP files always use
200 # forward slashes as the directory separator, as required by the
201 # ZIP format specification.
202 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000203 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000204
Greg Ward8e36d282003-06-18 00:53:06 +0000205 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000206 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000207 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.compress_type = ZIP_STORED # Type of compression for the file
209 self.comment = "" # Comment for each file
210 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000211 if sys.platform == 'win32':
212 self.create_system = 0 # System which created ZIP archive
213 else:
214 # Assume everything else is unix-y
215 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000216 self.create_version = 20 # Version which created ZIP archive
217 self.extract_version = 20 # Version needed to extract archive
218 self.reserved = 0 # Must be zero
219 self.flag_bits = 0 # ZIP flag bits
220 self.volume = 0 # Volume number of file header
221 self.internal_attr = 0 # Internal attributes
222 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000223 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000224 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000225 # CRC CRC-32 of the uncompressed file
226 # compress_size Size of the compressed file
227 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000228
229 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000230 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000231 dt = self.date_time
232 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000233 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000234 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000235 # Set these to zero because we write them after the file data
236 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000237 else:
Tim Peterse1190062001-01-15 03:34:38 +0000238 CRC = self.CRC
239 compress_size = self.compress_size
240 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000241
242 extra = self.extra
243
244 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
245 # File is larger than what fits into a 4 byte integer,
246 # fall back to the ZIP64 extension
247 fmt = '<hhqq'
248 extra = extra + struct.pack(fmt,
249 1, struct.calcsize(fmt)-4, file_size, compress_size)
250 file_size = 0xffffffff # -1
251 compress_size = 0xffffffff # -1
252 self.extract_version = max(45, self.extract_version)
253 self.create_version = max(45, self.extract_version)
254
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000255 header = struct.pack(structFileHeader, stringFileHeader,
256 self.extract_version, self.reserved, self.flag_bits,
257 self.compress_type, dostime, dosdate, CRC,
258 compress_size, file_size,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000259 len(self.filename), len(extra))
260 return header + self.filename + extra
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000261
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262 def _decodeExtra(self):
263 # Try to decode the extra field.
264 extra = self.extra
265 unpack = struct.unpack
266 while extra:
267 tp, ln = unpack('<hh', extra[:4])
268 if tp == 1:
269 if ln >= 24:
270 counts = unpack('<qqq', extra[4:28])
271 elif ln == 16:
272 counts = unpack('<qq', extra[4:20])
273 elif ln == 8:
274 counts = unpack('<q', extra[4:12])
275 elif ln == 0:
276 counts = ()
277 else:
278 raise RuntimeError, "Corrupt extra field %s"%(ln,)
279
280 idx = 0
281
282 # ZIP64 extension (large files and/or large archives)
283 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
284 self.file_size = counts[idx]
285 idx += 1
286
287 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
288 self.compress_size = counts[idx]
289 idx += 1
290
291 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
292 old = self.header_offset
293 self.header_offset = counts[idx]
294 idx+=1
295
296 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000297
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000299class _ZipDecrypter:
300 """Class to handle decryption of files stored within a ZIP archive.
301
302 ZIP supports a password-based form of encryption. Even though known
303 plaintext attacks have been found against it, it is still useful
304 for low-level securicy.
305
306 Usage:
307 zd = _ZipDecrypter(mypwd)
308 plain_char = zd(cypher_char)
309 plain_text = map(zd, cypher_text)
310 """
311
312 def _GenerateCRCTable():
313 """Generate a CRC-32 table.
314
315 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
316 internal keys. We noticed that a direct implementation is faster than
317 relying on binascii.crc32().
318 """
319 poly = 0xedb88320
320 table = [0] * 256
321 for i in range(256):
322 crc = i
323 for j in range(8):
324 if crc & 1:
325 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
326 else:
327 crc = ((crc >> 1) & 0x7FFFFFFF)
328 table[i] = crc
329 return table
330 crctable = _GenerateCRCTable()
331
332 def _crc32(self, ch, crc):
333 """Compute the CRC32 primitive on one byte."""
334 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
335
336 def __init__(self, pwd):
337 self.key0 = 305419896
338 self.key1 = 591751049
339 self.key2 = 878082192
340 for p in pwd:
341 self._UpdateKeys(p)
342
343 def _UpdateKeys(self, c):
344 self.key0 = self._crc32(c, self.key0)
345 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
346 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
347 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
348
349 def __call__(self, c):
350 """Decrypt a single character."""
351 c = ord(c)
352 k = self.key2 | 2
353 c = c ^ (((k * (k^1)) >> 8) & 255)
354 c = chr(c)
355 self._UpdateKeys(c)
356 return c
357
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000358class ZipExtFile:
359 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000360 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000361 """
Tim Petersea5962f2007-03-12 18:07:52 +0000362
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000363 def __init__(self, fileobj, zipinfo, decrypt=None):
364 self.fileobj = fileobj
365 self.decrypter = decrypt
366 self.bytes_read = 0L
367 self.rawbuffer = ''
368 self.readbuffer = ''
369 self.linebuffer = ''
370 self.eof = False
371 self.univ_newlines = False
372 self.nlSeps = ("\n", )
373 self.lastdiscard = ''
374
375 self.compress_type = zipinfo.compress_type
376 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000377
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000378 self.closed = False
379 self.mode = "r"
380 self.name = zipinfo.filename
381
382 # read from compressed files in 64k blocks
383 self.compreadsize = 64*1024
384 if self.compress_type == ZIP_DEFLATED:
385 self.dc = zlib.decompressobj(-15)
386
387 def set_univ_newlines(self, univ_newlines):
388 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000389
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000390 # pick line separator char(s) based on universal newlines flag
391 self.nlSeps = ("\n", )
392 if self.univ_newlines:
393 self.nlSeps = ("\r\n", "\r", "\n")
394
395 def __iter__(self):
396 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000397
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000398 def next(self):
399 nextline = self.readline()
400 if not nextline:
401 raise StopIteration()
402
403 return nextline
404
405 def close(self):
406 self.closed = True
407
408 def _checkfornewline(self):
409 nl, nllen = -1, -1
410 if self.linebuffer:
411 # ugly check for cases where half of an \r\n pair was
412 # read on the last pass, and the \r was discarded. In this
413 # case we just throw away the \n at the start of the buffer.
414 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
415 self.linebuffer = self.linebuffer[1:]
416
Tim Petersea5962f2007-03-12 18:07:52 +0000417 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000418 nl = self.linebuffer.find(sep)
419 if nl >= 0:
420 nllen = len(sep)
421 return nl, nllen
422
423 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000424
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000425 def readline(self, size = -1):
426 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000427 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000428 """
429 if size < 0:
430 size = sys.maxint
431 elif size == 0:
432 return ''
433
434 # check for a newline already in buffer
435 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000436
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000437 if nl >= 0:
438 # the next line was already in the buffer
439 nl = min(nl, size)
440 else:
441 # no line break in buffer - try to read more
442 size -= len(self.linebuffer)
443 while nl < 0 and size > 0:
444 buf = self.read(min(size, 100))
445 if not buf:
446 break
447 self.linebuffer += buf
448 size -= len(buf)
449
450 # check for a newline in buffer
451 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000452
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000453 # we either ran out of bytes in the file, or
454 # met the specified size limit without finding a newline,
455 # so return current buffer
456 if nl < 0:
457 s = self.linebuffer
458 self.linebuffer = ''
459 return s
460
461 buf = self.linebuffer[:nl]
462 self.lastdiscard = self.linebuffer[nl:nl + nllen]
463 self.linebuffer = self.linebuffer[nl + nllen:]
464
465 # line is always returned with \n as newline char (except possibly
466 # for a final incomplete line in the file, which is handled above).
467 return buf + "\n"
468
469 def readlines(self, sizehint = -1):
470 """Return a list with all (following) lines. The sizehint parameter
471 is ignored in this implementation.
472 """
473 result = []
474 while True:
475 line = self.readline()
476 if not line: break
477 result.append(line)
478 return result
479
480 def read(self, size = None):
481 # act like file() obj and return empty string if size is 0
482 if size == 0:
483 return ''
484
485 # determine read size
486 bytesToRead = self.compress_size - self.bytes_read
487
488 # adjust read size for encrypted files since the first 12 bytes
489 # are for the encryption/password information
490 if self.decrypter is not None:
491 bytesToRead -= 12
492
493 if size is not None and size >= 0:
494 if self.compress_type == ZIP_STORED:
495 lr = len(self.readbuffer)
496 bytesToRead = min(bytesToRead, size - lr)
497 elif self.compress_type == ZIP_DEFLATED:
498 if len(self.readbuffer) > size:
499 # the user has requested fewer bytes than we've already
500 # pulled through the decompressor; don't read any more
501 bytesToRead = 0
502 else:
503 # user will use up the buffer, so read some more
504 lr = len(self.rawbuffer)
505 bytesToRead = min(bytesToRead, self.compreadsize - lr)
506
507 # avoid reading past end of file contents
508 if bytesToRead + self.bytes_read > self.compress_size:
509 bytesToRead = self.compress_size - self.bytes_read
510
511 # try to read from file (if necessary)
512 if bytesToRead > 0:
513 bytes = self.fileobj.read(bytesToRead)
514 self.bytes_read += len(bytes)
515 self.rawbuffer += bytes
516
517 # handle contents of raw buffer
518 if self.rawbuffer:
519 newdata = self.rawbuffer
520 self.rawbuffer = ''
521
522 # decrypt new data if we were given an object to handle that
523 if newdata and self.decrypter is not None:
524 newdata = ''.join(map(self.decrypter, newdata))
525
526 # decompress newly read data if necessary
527 if newdata and self.compress_type == ZIP_DEFLATED:
528 newdata = self.dc.decompress(newdata)
529 self.rawbuffer = self.dc.unconsumed_tail
530 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000531 # we're out of raw bytes (both from the file and
532 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000533 # decompressor is done
534 newdata += self.dc.flush()
535 # prevent decompressor from being used again
536 self.dc = None
537
538 self.readbuffer += newdata
539
540
541 # return what the user asked for
542 if size is None or len(self.readbuffer) <= size:
543 bytes = self.readbuffer
544 self.readbuffer = ''
545 else:
546 bytes = self.readbuffer[:size]
547 self.readbuffer = self.readbuffer[size:]
548
549 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000550
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000551
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000552class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000553 """ Class with methods to open, read, write, close, list zip files.
554
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000555 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000556
Fred Drake3d9091e2001-03-26 15:49:24 +0000557 file: Either the path to the file, or a file-like object.
558 If it is a path, the file will be opened and closed by ZipFile.
559 mode: The mode can be either read "r", write "w" or append "a".
560 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000561 allowZip64: if True ZipFile will create files with ZIP64 extensions when
562 needed, otherwise it will raise an exception when this would
563 be necessary.
564
Fred Drake3d9091e2001-03-26 15:49:24 +0000565 """
Fred Drake484d7352000-10-02 21:14:52 +0000566
Fred Drake90eac282001-02-28 05:29:34 +0000567 fp = None # Set here since __del__ checks it
568
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000569 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000570 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000571 self._allowZip64 = allowZip64
572 self._didModify = False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000573 if compression == ZIP_STORED:
574 pass
575 elif compression == ZIP_DEFLATED:
576 if not zlib:
577 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000578 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000579 else:
580 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000581 self.debug = 0 # Level of printing: 0 through 3
582 self.NameToInfo = {} # Find file info given name
583 self.filelist = [] # List of ZipInfo instances for archive
584 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000585 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000586 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000587
Fred Drake3d9091e2001-03-26 15:49:24 +0000588 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000589 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000590 self._filePassed = 0
591 self.filename = file
592 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000593 try:
594 self.fp = open(file, modeDict[mode])
595 except IOError:
596 if mode == 'a':
597 mode = key = 'w'
598 self.fp = open(file, modeDict[mode])
599 else:
600 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000601 else:
602 self._filePassed = 1
603 self.fp = file
604 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000605
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000606 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000607 self._GetContents()
608 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000609 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000610 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000611 try: # See if file is a zip file
612 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000613 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000614 self.fp.seek(self.start_dir, 0)
615 except BadZipfile: # file is not a zip file, just append
616 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000618 if not self._filePassed:
619 self.fp.close()
620 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000621 raise RuntimeError, 'Mode must be "r", "w" or "a"'
622
623 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000624 """Read the directory, making sure we close the file if the format
625 is bad."""
626 try:
627 self._RealGetContents()
628 except BadZipfile:
629 if not self._filePassed:
630 self.fp.close()
631 self.fp = None
632 raise
633
634 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000635 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000636 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000637 endrec = _EndRecData(fp)
638 if not endrec:
639 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000640 if self.debug > 1:
641 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000642 size_cd = endrec[5] # bytes in central directory
643 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000644 self.comment = endrec[8] # archive comment
645 # endrec[9] is the offset of the "End of Central Dir" record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000646 if endrec[9] > ZIP64_LIMIT:
647 x = endrec[9] - size_cd - 56 - 20
648 else:
649 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000650 # "concat" is zero, unless zip was concatenated to another file
651 concat = x - offset_cd
652 if self.debug > 2:
653 print "given, inferred, offset", offset_cd, x, concat
654 # self.start_dir: Position of start of central directory
655 self.start_dir = offset_cd + concat
656 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000657 data = fp.read(size_cd)
658 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000659 total = 0
660 while total < size_cd:
661 centdir = fp.read(46)
662 total = total + 46
663 if centdir[0:4] != stringCentralDir:
664 raise BadZipfile, "Bad magic number for central directory"
665 centdir = struct.unpack(structCentralDir, centdir)
666 if self.debug > 2:
667 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000668 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000669 # Create ZipInfo instance to store file information
670 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000671 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
672 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
673 total = (total + centdir[_CD_FILENAME_LENGTH]
674 + centdir[_CD_EXTRA_FIELD_LENGTH]
675 + centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000676 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000677 (x.create_version, x.create_system, x.extract_version, x.reserved,
678 x.flag_bits, x.compress_type, t, d,
679 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
680 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
681 # Convert date/time code to (year, month, day, hour, min, sec)
682 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000683 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000684
685 x._decodeExtra()
686 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000687 self.filelist.append(x)
688 self.NameToInfo[x.filename] = x
689 if self.debug > 2:
690 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000691
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000692
693 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000694 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000695 l = []
696 for data in self.filelist:
697 l.append(data.filename)
698 return l
699
700 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000701 """Return a list of class ZipInfo instances for files in the
702 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 return self.filelist
704
705 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000706 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000707 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
708 for zinfo in self.filelist:
709 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
710 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
711
712 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000713 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 for zinfo in self.filelist:
715 try:
Tim Peterse1190062001-01-15 03:34:38 +0000716 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000717 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 return zinfo.filename
719
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000720
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000722 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 return self.NameToInfo[name]
724
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000725 def setpassword(self, pwd):
726 """Set default password for encrypted files."""
727 self.pwd = pwd
728
729 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000730 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000731 return self.open(name, "r", pwd).read()
732
733 def open(self, name, mode="r", pwd=None):
734 """Return file-like object for 'name'."""
735 if mode not in ("r", "U", "rU"):
736 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000737 if not self.fp:
738 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000739 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000740
Tim Petersea5962f2007-03-12 18:07:52 +0000741 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000742 # given a file object in the constructor
743 if self._filePassed:
744 zef_file = self.fp
745 else:
746 zef_file = open(self.filename, 'rb')
747
748 # Get info object for name
749 zinfo = self.getinfo(name)
750
751 filepos = zef_file.tell()
752
753 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000754
755 # Skip the file header:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000756 fheader = zef_file.read(30)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000757 if fheader[0:4] != stringFileHeader:
758 raise BadZipfile, "Bad magic number for file header"
759
760 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000761 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000762 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000763 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000764
765 if fname != zinfo.orig_filename:
766 raise BadZipfile, \
767 'File name in directory "%s" and header "%s" differ.' % (
768 zinfo.orig_filename, fname)
769
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000770 # check for encrypted flag & handle password
771 is_encrypted = zinfo.flag_bits & 0x1
772 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000773 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000774 if not pwd:
775 pwd = self.pwd
776 if not pwd:
777 raise RuntimeError, "File %s is encrypted, " \
778 "password required for extraction" % name
779
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000780 zd = _ZipDecrypter(pwd)
781 # The first 12 bytes in the cypher stream is an encryption header
782 # used to strengthen the algorithm. The first 11 bytes are
783 # completely random, while the 12th contains the MSB of the CRC,
784 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000785 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000786 h = map(zd, bytes[0:12])
787 if ord(h[11]) != ((zinfo.CRC>>24)&255):
788 raise RuntimeError, "Bad password for file %s" % name
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000789
790 # build and return a ZipExtFile
791 if zd is None:
792 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000794 zef = ZipExtFile(zef_file, zinfo, zd)
795
796 # set universal newlines on ZipExtFile if necessary
797 if "U" in mode:
798 zef.set_univ_newlines(True)
799 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000800
801 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000802 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000803 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000804 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 print "Duplicate name:", zinfo.filename
806 if self.mode not in ("w", "a"):
807 raise RuntimeError, 'write() requires mode "w" or "a"'
808 if not self.fp:
809 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000810 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
812 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000813 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
815 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000816 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000817 if zinfo.file_size > ZIP64_LIMIT:
818 if not self._allowZip64:
819 raise LargeZipFile("Filesize would require ZIP64 extensions")
820 if zinfo.header_offset > ZIP64_LIMIT:
821 if not self._allowZip64:
822 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823
824 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000825 """Put the bytes from filename into the archive under the name
826 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000828 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829 date_time = mtime[0:6]
830 # Create ZipInfo instance to store file information
831 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000832 arcname = filename
833 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
834 while arcname[0] in (os.sep, os.altsep):
835 arcname = arcname[1:]
836 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000837 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000839 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 else:
Tim Peterse1190062001-01-15 03:34:38 +0000841 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000842
843 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000844 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000845 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000846
847 self._writecheck(zinfo)
848 self._didModify = True
849 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000850 # Must overwrite CRC and sizes with correct data later
851 zinfo.CRC = CRC = 0
852 zinfo.compress_size = compress_size = 0
853 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855 if zinfo.compress_type == ZIP_DEFLATED:
856 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
857 zlib.DEFLATED, -15)
858 else:
859 cmpr = None
860 while 1:
861 buf = fp.read(1024 * 8)
862 if not buf:
863 break
864 file_size = file_size + len(buf)
865 CRC = binascii.crc32(buf, CRC)
866 if cmpr:
867 buf = cmpr.compress(buf)
868 compress_size = compress_size + len(buf)
869 self.fp.write(buf)
870 fp.close()
871 if cmpr:
872 buf = cmpr.flush()
873 compress_size = compress_size + len(buf)
874 self.fp.write(buf)
875 zinfo.compress_size = compress_size
876 else:
877 zinfo.compress_size = file_size
878 zinfo.CRC = CRC
879 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000880 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000881 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000882 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000883 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000884 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000885 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000886 self.filelist.append(zinfo)
887 self.NameToInfo[zinfo.filename] = zinfo
888
Just van Rossumb083cb32002-12-12 12:23:32 +0000889 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000890 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000891 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
892 the name of the file in the archive."""
893 if not isinstance(zinfo_or_arcname, ZipInfo):
894 zinfo = ZipInfo(filename=zinfo_or_arcname,
895 date_time=time.localtime(time.time()))
896 zinfo.compress_type = self.compression
897 else:
898 zinfo = zinfo_or_arcname
Tim Peterse1190062001-01-15 03:34:38 +0000899 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000900 zinfo.header_offset = self.fp.tell() # Start of header bytes
901 self._writecheck(zinfo)
902 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000903 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000904 if zinfo.compress_type == ZIP_DEFLATED:
905 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
906 zlib.DEFLATED, -15)
907 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000908 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 else:
910 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000911 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000912 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000913 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000914 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000916 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000917 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000918 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000919 self.filelist.append(zinfo)
920 self.NameToInfo[zinfo.filename] = zinfo
921
922 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000923 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000924 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000925
926 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000927 """Close the file, and for mode "w" and "a" write the ending
928 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000929 if self.fp is None:
930 return
Tim Petersa608bb22006-06-15 18:06:29 +0000931
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000932 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000933 count = 0
934 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000935 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000936 count = count + 1
937 dt = zinfo.date_time
938 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000939 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000940 extra = []
941 if zinfo.file_size > ZIP64_LIMIT \
942 or zinfo.compress_size > ZIP64_LIMIT:
943 extra.append(zinfo.file_size)
944 extra.append(zinfo.compress_size)
945 file_size = 0xffffffff #-1
946 compress_size = 0xffffffff #-1
947 else:
948 file_size = zinfo.file_size
949 compress_size = zinfo.compress_size
950
951 if zinfo.header_offset > ZIP64_LIMIT:
952 extra.append(zinfo.header_offset)
Tim Petersf79c32d2006-07-31 02:53:03 +0000953 header_offset = -1 # struct "l" format: 32 one bits
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000954 else:
955 header_offset = zinfo.header_offset
956
957 extra_data = zinfo.extra
958 if extra:
959 # Append a ZIP64 field to the extra's
960 extra_data = struct.pack(
961 '<hh' + 'q'*len(extra),
962 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +0000963
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000964 extract_version = max(45, zinfo.extract_version)
965 create_version = max(45, zinfo.create_version)
966 else:
967 extract_version = zinfo.extract_version
968 create_version = zinfo.create_version
969
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000970 centdir = struct.pack(structCentralDir,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000971 stringCentralDir, create_version,
972 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000973 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000974 zinfo.CRC, compress_size, file_size,
975 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000976 0, zinfo.internal_attr, zinfo.external_attr,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000977 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000978 self.fp.write(centdir)
979 self.fp.write(zinfo.filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000980 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000981 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000982
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000983 pos2 = self.fp.tell()
984 # Write end-of-zip-archive record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000985 if pos1 > ZIP64_LIMIT:
986 # Need to write the ZIP64 end-of-archive records
987 zip64endrec = struct.pack(
988 structEndArchive64, stringEndArchive64,
989 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
990 self.fp.write(zip64endrec)
991
992 zip64locrec = struct.pack(
Tim Petersa608bb22006-06-15 18:06:29 +0000993 structEndArchive64Locator,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000994 stringEndArchive64Locator, 0, pos2, 1)
995 self.fp.write(zip64locrec)
996
Tim Peters352bf0d2006-07-31 02:40:23 +0000997 # XXX Why is `pos3` computed next? It's never referenced.
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000998 pos3 = self.fp.tell()
999 endrec = struct.pack(structEndArchive, stringEndArchive,
Tim Peters352bf0d2006-07-31 02:40:23 +00001000 0, 0, count, count, pos2 - pos1, -1, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001001 self.fp.write(endrec)
1002
1003 else:
1004 endrec = struct.pack(structEndArchive, stringEndArchive,
1005 0, 0, count, count, pos2 - pos1, pos1, 0)
1006 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001007 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001008 if not self._filePassed:
1009 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 self.fp = None
1011
1012
1013class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001014 """Class to create ZIP archives with Python library files and packages."""
1015
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 def writepy(self, pathname, basename = ""):
1017 """Add all files from "pathname" to the ZIP archive.
1018
Fred Drake484d7352000-10-02 21:14:52 +00001019 If pathname is a package directory, search the directory and
1020 all package subdirectories recursively for all *.py and enter
1021 the modules into the archive. If pathname is a plain
1022 directory, listdir *.py and enter all modules. Else, pathname
1023 must be a Python *.py file and the module will be put into the
1024 archive. Added modules are always module.pyo or module.pyc.
1025 This method will compile the module.py into module.pyc if
1026 necessary.
1027 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028 dir, name = os.path.split(pathname)
1029 if os.path.isdir(pathname):
1030 initname = os.path.join(pathname, "__init__.py")
1031 if os.path.isfile(initname):
1032 # This is a package directory, add it
1033 if basename:
1034 basename = "%s/%s" % (basename, name)
1035 else:
1036 basename = name
1037 if self.debug:
1038 print "Adding package in", pathname, "as", basename
1039 fname, arcname = self._get_codename(initname[0:-3], basename)
1040 if self.debug:
1041 print "Adding", arcname
1042 self.write(fname, arcname)
1043 dirlist = os.listdir(pathname)
1044 dirlist.remove("__init__.py")
1045 # Add all *.py files and package subdirectories
1046 for filename in dirlist:
1047 path = os.path.join(pathname, filename)
1048 root, ext = os.path.splitext(filename)
1049 if os.path.isdir(path):
1050 if os.path.isfile(os.path.join(path, "__init__.py")):
1051 # This is a package directory, add it
1052 self.writepy(path, basename) # Recursive call
1053 elif ext == ".py":
1054 fname, arcname = self._get_codename(path[0:-3],
1055 basename)
1056 if self.debug:
1057 print "Adding", arcname
1058 self.write(fname, arcname)
1059 else:
1060 # This is NOT a package directory, add its files at top level
1061 if self.debug:
1062 print "Adding files from directory", pathname
1063 for filename in os.listdir(pathname):
1064 path = os.path.join(pathname, filename)
1065 root, ext = os.path.splitext(filename)
1066 if ext == ".py":
1067 fname, arcname = self._get_codename(path[0:-3],
1068 basename)
1069 if self.debug:
1070 print "Adding", arcname
1071 self.write(fname, arcname)
1072 else:
1073 if pathname[-3:] != ".py":
1074 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001075 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076 fname, arcname = self._get_codename(pathname[0:-3], basename)
1077 if self.debug:
1078 print "Adding file", arcname
1079 self.write(fname, arcname)
1080
1081 def _get_codename(self, pathname, basename):
1082 """Return (filename, archivename) for the path.
1083
Fred Drake484d7352000-10-02 21:14:52 +00001084 Given a module name path, return the correct file path and
1085 archive name, compiling if necessary. For example, given
1086 /python/lib/string, return (/python/lib/string.pyc, string).
1087 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001088 file_py = pathname + ".py"
1089 file_pyc = pathname + ".pyc"
1090 file_pyo = pathname + ".pyo"
1091 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001092 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001093 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001095 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001096 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097 if self.debug:
1098 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001099 try:
1100 py_compile.compile(file_py, file_pyc, None, True)
1101 except py_compile.PyCompileError,err:
1102 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 fname = file_pyc
1104 else:
1105 fname = file_pyc
1106 archivename = os.path.split(fname)[1]
1107 if basename:
1108 archivename = "%s/%s" % (basename, archivename)
1109 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001110
1111
1112def main(args = None):
1113 import textwrap
1114 USAGE=textwrap.dedent("""\
1115 Usage:
1116 zipfile.py -l zipfile.zip # Show listing of a zipfile
1117 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1118 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1119 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1120 """)
1121 if args is None:
1122 args = sys.argv[1:]
1123
1124 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1125 print USAGE
1126 sys.exit(1)
1127
1128 if args[0] == '-l':
1129 if len(args) != 2:
1130 print USAGE
1131 sys.exit(1)
1132 zf = ZipFile(args[1], 'r')
1133 zf.printdir()
1134 zf.close()
1135
1136 elif args[0] == '-t':
1137 if len(args) != 2:
1138 print USAGE
1139 sys.exit(1)
1140 zf = ZipFile(args[1], 'r')
1141 zf.testzip()
1142 print "Done testing"
1143
1144 elif args[0] == '-e':
1145 if len(args) != 3:
1146 print USAGE
1147 sys.exit(1)
1148
1149 zf = ZipFile(args[1], 'r')
1150 out = args[2]
1151 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001152 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001153 tgt = os.path.join(out, path[2:])
1154 else:
1155 tgt = os.path.join(out, path)
1156
1157 tgtdir = os.path.dirname(tgt)
1158 if not os.path.exists(tgtdir):
1159 os.makedirs(tgtdir)
1160 fp = open(tgt, 'wb')
1161 fp.write(zf.read(path))
1162 fp.close()
1163 zf.close()
1164
1165 elif args[0] == '-c':
1166 if len(args) < 3:
1167 print USAGE
1168 sys.exit(1)
1169
1170 def addToZip(zf, path, zippath):
1171 if os.path.isfile(path):
1172 zf.write(path, zippath, ZIP_DEFLATED)
1173 elif os.path.isdir(path):
1174 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001175 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001176 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001177 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001178
1179 zf = ZipFile(args[1], 'w', allowZip64=True)
1180 for src in args[2:]:
1181 addToZip(zf, src, os.path.basename(src))
1182
1183 zf.close()
1184
1185if __name__ == "__main__":
1186 main()