blob: 9f5669e71b71e166edaa17cac3de6c53671bb053 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Ronald Oussoren143cefb2006-06-15 08:14:18 +00005import binascii, cStringIO
Guido van Rossum32abe6f2000-03-31 17:30:02 +00006
7try:
Tim Peterse1190062001-01-15 03:34:38 +00008 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +00009except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010 zlib = None
11
Skip Montanaro40fc1602001-03-01 04:27:19 +000012__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000013 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000014
Fred Drake5db246d2000-09-29 20:44:48 +000015class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017
18
19class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000020 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
22 and those extensions are disabled.
23 """
24
Tim Peterse1190062001-01-15 03:34:38 +000025error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Ronald Oussoren143cefb2006-06-15 08:14:18 +000027ZIP64_LIMIT= (1 << 31) - 1
28
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029# constants for Zip file compression methods
30ZIP_STORED = 0
31ZIP_DEFLATED = 8
32# Other ZIP compression methods not supported
33
34# Here are some struct module formats for reading headers
Gregory P. Smith350d03b2008-01-19 23:10:52 +000035structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000036stringEndArchive = "PK\005\006" # magic number for end of archive record
Gregory P. Smith350d03b2008-01-19 23:10:52 +000037structCentralDir = "<4s4B4HlLL5HLL"# 19 items, central directory, 46 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000038stringCentralDir = "PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000039structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040stringFileHeader = "PK\003\004" # magic number for file header
Ronald Oussoren143cefb2006-06-15 08:14:18 +000041structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
42stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
43structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
44stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Fred Drake3e038e52001-02-28 17:56:26 +000047# indexes of entries in the central directory structure
48_CD_SIGNATURE = 0
49_CD_CREATE_VERSION = 1
50_CD_CREATE_SYSTEM = 2
51_CD_EXTRACT_VERSION = 3
52_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
53_CD_FLAG_BITS = 5
54_CD_COMPRESS_TYPE = 6
55_CD_TIME = 7
56_CD_DATE = 8
57_CD_CRC = 9
58_CD_COMPRESSED_SIZE = 10
59_CD_UNCOMPRESSED_SIZE = 11
60_CD_FILENAME_LENGTH = 12
61_CD_EXTRA_FIELD_LENGTH = 13
62_CD_COMMENT_LENGTH = 14
63_CD_DISK_NUMBER_START = 15
64_CD_INTERNAL_FILE_ATTRIBUTES = 16
65_CD_EXTERNAL_FILE_ATTRIBUTES = 17
66_CD_LOCAL_HEADER_OFFSET = 18
67
68# indexes of entries in the local file header structure
69_FH_SIGNATURE = 0
70_FH_EXTRACT_VERSION = 1
71_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
72_FH_GENERAL_PURPOSE_FLAG_BITS = 3
73_FH_COMPRESSION_METHOD = 4
74_FH_LAST_MOD_TIME = 5
75_FH_LAST_MOD_DATE = 6
76_FH_CRC = 7
77_FH_COMPRESSED_SIZE = 8
78_FH_UNCOMPRESSED_SIZE = 9
79_FH_FILENAME_LENGTH = 10
80_FH_EXTRA_FIELD_LENGTH = 11
81
Guido van Rossum32abe6f2000-03-31 17:30:02 +000082def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000083 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084 try:
85 fpin = open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000086 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000087 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000089 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000090 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000091 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000092 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093
Ronald Oussoren143cefb2006-06-15 08:14:18 +000094def _EndRecData64(fpin, offset, endrec):
95 """
96 Read the ZIP64 end-of-archive records and use that to update endrec
97 """
98 locatorSize = struct.calcsize(structEndArchive64Locator)
99 fpin.seek(offset - locatorSize, 2)
100 data = fpin.read(locatorSize)
101 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000102 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000103 return endrec
104
105 if diskno != 0 or disks != 1:
106 raise BadZipfile("zipfiles that span multiple disks are not supported")
107
Tim Petersa608bb22006-06-15 18:06:29 +0000108 # Assume no 'zip64 extensible data'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000109 endArchiveSize = struct.calcsize(structEndArchive64)
110 fpin.seek(offset - locatorSize - endArchiveSize, 2)
111 data = fpin.read(endArchiveSize)
112 sig, sz, create_version, read_version, disk_num, disk_dir, \
113 dircount, dircount2, dirsize, diroffset = \
114 struct.unpack(structEndArchive64, data)
Tim Petersa608bb22006-06-15 18:06:29 +0000115 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000116 return endrec
117
118 # Update the original endrec using data from the ZIP64 record
119 endrec[1] = disk_num
120 endrec[2] = disk_dir
121 endrec[3] = dircount
122 endrec[4] = dircount2
123 endrec[5] = dirsize
124 endrec[6] = diroffset
125 return endrec
126
127
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000128def _EndRecData(fpin):
129 """Return data from the "End of Central Directory" record, or None.
130
131 The data is a list of the nine items in the ZIP "End of central dir"
132 record followed by a tenth item, the file seek offset of this record."""
133 fpin.seek(-22, 2) # Assume no archive comment.
134 filesize = fpin.tell() + 22 # Get file size
135 data = fpin.read()
136 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
137 endrec = struct.unpack(structEndArchive, data)
138 endrec = list(endrec)
139 endrec.append("") # Append the archive comment
140 endrec.append(filesize - 22) # Append the record start offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000141 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
142 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000143 return endrec
144 # Search the last END_BLOCK bytes of the file for the record signature.
145 # The comment is appended to the ZIP file and has a 16 bit length.
146 # So the comment may be up to 64K long. We limit the search for the
147 # signature to a few Kbytes at the end of the file for efficiency.
148 # also, the signature must not appear in the comment.
149 END_BLOCK = min(filesize, 1024 * 4)
150 fpin.seek(filesize - END_BLOCK, 0)
151 data = fpin.read()
152 start = data.rfind(stringEndArchive)
153 if start >= 0: # Correct signature string was found
154 endrec = struct.unpack(structEndArchive, data[start:start+22])
155 endrec = list(endrec)
156 comment = data[start+22:]
157 if endrec[7] == len(comment): # Comment length checks out
158 # Append the archive comment and start offset
159 endrec.append(comment)
160 endrec.append(filesize - END_BLOCK + start)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000161 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
162 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000163 return endrec
164 return # Error, return None
165
Fred Drake484d7352000-10-02 21:14:52 +0000166
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000167class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000168 """Class with attributes describing each file in the ZIP archive."""
169
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000170 __slots__ = (
171 'orig_filename',
172 'filename',
173 'date_time',
174 'compress_type',
175 'comment',
176 'extra',
177 'create_system',
178 'create_version',
179 'extract_version',
180 'reserved',
181 'flag_bits',
182 'volume',
183 'internal_attr',
184 'external_attr',
185 'header_offset',
186 'CRC',
187 'compress_size',
188 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000189 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000190 )
191
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000193 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000194
195 # Terminate the file name at the first null byte. Null bytes in file
196 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000197 null_byte = filename.find(chr(0))
198 if null_byte >= 0:
199 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000200 # This is used to ensure paths in generated ZIP files always use
201 # forward slashes as the directory separator, as required by the
202 # ZIP format specification.
203 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000204 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000205
Greg Ward8e36d282003-06-18 00:53:06 +0000206 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000207 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000208 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000209 self.compress_type = ZIP_STORED # Type of compression for the file
210 self.comment = "" # Comment for each file
211 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000212 if sys.platform == 'win32':
213 self.create_system = 0 # System which created ZIP archive
214 else:
215 # Assume everything else is unix-y
216 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000217 self.create_version = 20 # Version which created ZIP archive
218 self.extract_version = 20 # Version needed to extract archive
219 self.reserved = 0 # Must be zero
220 self.flag_bits = 0 # ZIP flag bits
221 self.volume = 0 # Volume number of file header
222 self.internal_attr = 0 # Internal attributes
223 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000224 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000225 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000226 # CRC CRC-32 of the uncompressed file
227 # compress_size Size of the compressed file
228 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000229
230 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000231 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000232 dt = self.date_time
233 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000234 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000235 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000236 # Set these to zero because we write them after the file data
237 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000238 else:
Tim Peterse1190062001-01-15 03:34:38 +0000239 CRC = self.CRC
240 compress_size = self.compress_size
241 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000242
243 extra = self.extra
244
245 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
246 # File is larger than what fits into a 4 byte integer,
247 # fall back to the ZIP64 extension
248 fmt = '<hhqq'
249 extra = extra + struct.pack(fmt,
250 1, struct.calcsize(fmt)-4, file_size, compress_size)
251 file_size = 0xffffffff # -1
252 compress_size = 0xffffffff # -1
253 self.extract_version = max(45, self.extract_version)
254 self.create_version = max(45, self.extract_version)
255
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000256 header = struct.pack(structFileHeader, stringFileHeader,
257 self.extract_version, self.reserved, self.flag_bits,
258 self.compress_type, dostime, dosdate, CRC,
259 compress_size, file_size,
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000260 len(self.filename), len(extra))
261 return header + self.filename + extra
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000262
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000263 def _decodeExtra(self):
264 # Try to decode the extra field.
265 extra = self.extra
266 unpack = struct.unpack
267 while extra:
268 tp, ln = unpack('<hh', extra[:4])
269 if tp == 1:
270 if ln >= 24:
271 counts = unpack('<qqq', extra[4:28])
272 elif ln == 16:
273 counts = unpack('<qq', extra[4:20])
274 elif ln == 8:
275 counts = unpack('<q', extra[4:12])
276 elif ln == 0:
277 counts = ()
278 else:
279 raise RuntimeError, "Corrupt extra field %s"%(ln,)
280
281 idx = 0
282
283 # ZIP64 extension (large files and/or large archives)
284 if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
285 self.file_size = counts[idx]
286 idx += 1
287
288 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
289 self.compress_size = counts[idx]
290 idx += 1
291
292 if self.header_offset == -1 or self.header_offset == 0xffffffffL:
293 old = self.header_offset
294 self.header_offset = counts[idx]
295 idx+=1
296
297 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000298
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000300class _ZipDecrypter:
301 """Class to handle decryption of files stored within a ZIP archive.
302
303 ZIP supports a password-based form of encryption. Even though known
304 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000305 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000306
307 Usage:
308 zd = _ZipDecrypter(mypwd)
309 plain_char = zd(cypher_char)
310 plain_text = map(zd, cypher_text)
311 """
312
313 def _GenerateCRCTable():
314 """Generate a CRC-32 table.
315
316 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
317 internal keys. We noticed that a direct implementation is faster than
318 relying on binascii.crc32().
319 """
320 poly = 0xedb88320
321 table = [0] * 256
322 for i in range(256):
323 crc = i
324 for j in range(8):
325 if crc & 1:
326 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
327 else:
328 crc = ((crc >> 1) & 0x7FFFFFFF)
329 table[i] = crc
330 return table
331 crctable = _GenerateCRCTable()
332
333 def _crc32(self, ch, crc):
334 """Compute the CRC32 primitive on one byte."""
335 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
336
337 def __init__(self, pwd):
338 self.key0 = 305419896
339 self.key1 = 591751049
340 self.key2 = 878082192
341 for p in pwd:
342 self._UpdateKeys(p)
343
344 def _UpdateKeys(self, c):
345 self.key0 = self._crc32(c, self.key0)
346 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
347 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
348 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
349
350 def __call__(self, c):
351 """Decrypt a single character."""
352 c = ord(c)
353 k = self.key2 | 2
354 c = c ^ (((k * (k^1)) >> 8) & 255)
355 c = chr(c)
356 self._UpdateKeys(c)
357 return c
358
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000359class ZipExtFile:
360 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000361 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000362 """
Tim Petersea5962f2007-03-12 18:07:52 +0000363
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000364 def __init__(self, fileobj, zipinfo, decrypt=None):
365 self.fileobj = fileobj
366 self.decrypter = decrypt
367 self.bytes_read = 0L
368 self.rawbuffer = ''
369 self.readbuffer = ''
370 self.linebuffer = ''
371 self.eof = False
372 self.univ_newlines = False
373 self.nlSeps = ("\n", )
374 self.lastdiscard = ''
375
376 self.compress_type = zipinfo.compress_type
377 self.compress_size = zipinfo.compress_size
Tim Petersea5962f2007-03-12 18:07:52 +0000378
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000379 self.closed = False
380 self.mode = "r"
381 self.name = zipinfo.filename
382
383 # read from compressed files in 64k blocks
384 self.compreadsize = 64*1024
385 if self.compress_type == ZIP_DEFLATED:
386 self.dc = zlib.decompressobj(-15)
387
388 def set_univ_newlines(self, univ_newlines):
389 self.univ_newlines = univ_newlines
Tim Petersea5962f2007-03-12 18:07:52 +0000390
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000391 # pick line separator char(s) based on universal newlines flag
392 self.nlSeps = ("\n", )
393 if self.univ_newlines:
394 self.nlSeps = ("\r\n", "\r", "\n")
395
396 def __iter__(self):
397 return self
Tim Petersea5962f2007-03-12 18:07:52 +0000398
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000399 def next(self):
400 nextline = self.readline()
401 if not nextline:
402 raise StopIteration()
403
404 return nextline
405
406 def close(self):
407 self.closed = True
408
409 def _checkfornewline(self):
410 nl, nllen = -1, -1
411 if self.linebuffer:
412 # ugly check for cases where half of an \r\n pair was
413 # read on the last pass, and the \r was discarded. In this
414 # case we just throw away the \n at the start of the buffer.
415 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
416 self.linebuffer = self.linebuffer[1:]
417
Tim Petersea5962f2007-03-12 18:07:52 +0000418 for sep in self.nlSeps:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000419 nl = self.linebuffer.find(sep)
420 if nl >= 0:
421 nllen = len(sep)
422 return nl, nllen
423
424 return nl, nllen
Tim Petersea5962f2007-03-12 18:07:52 +0000425
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000426 def readline(self, size = -1):
427 """Read a line with approx. size. If size is negative,
Tim Petersea5962f2007-03-12 18:07:52 +0000428 read a whole line.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000429 """
430 if size < 0:
431 size = sys.maxint
432 elif size == 0:
433 return ''
434
435 # check for a newline already in buffer
436 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000437
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000438 if nl >= 0:
439 # the next line was already in the buffer
440 nl = min(nl, size)
441 else:
442 # no line break in buffer - try to read more
443 size -= len(self.linebuffer)
444 while nl < 0 and size > 0:
445 buf = self.read(min(size, 100))
446 if not buf:
447 break
448 self.linebuffer += buf
449 size -= len(buf)
450
451 # check for a newline in buffer
452 nl, nllen = self._checkfornewline()
Tim Petersea5962f2007-03-12 18:07:52 +0000453
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000454 # we either ran out of bytes in the file, or
455 # met the specified size limit without finding a newline,
456 # so return current buffer
457 if nl < 0:
458 s = self.linebuffer
459 self.linebuffer = ''
460 return s
461
462 buf = self.linebuffer[:nl]
463 self.lastdiscard = self.linebuffer[nl:nl + nllen]
464 self.linebuffer = self.linebuffer[nl + nllen:]
465
466 # line is always returned with \n as newline char (except possibly
467 # for a final incomplete line in the file, which is handled above).
468 return buf + "\n"
469
470 def readlines(self, sizehint = -1):
471 """Return a list with all (following) lines. The sizehint parameter
472 is ignored in this implementation.
473 """
474 result = []
475 while True:
476 line = self.readline()
477 if not line: break
478 result.append(line)
479 return result
480
481 def read(self, size = None):
482 # act like file() obj and return empty string if size is 0
483 if size == 0:
484 return ''
485
486 # determine read size
487 bytesToRead = self.compress_size - self.bytes_read
488
489 # adjust read size for encrypted files since the first 12 bytes
490 # are for the encryption/password information
491 if self.decrypter is not None:
492 bytesToRead -= 12
493
494 if size is not None and size >= 0:
495 if self.compress_type == ZIP_STORED:
496 lr = len(self.readbuffer)
497 bytesToRead = min(bytesToRead, size - lr)
498 elif self.compress_type == ZIP_DEFLATED:
499 if len(self.readbuffer) > size:
500 # the user has requested fewer bytes than we've already
501 # pulled through the decompressor; don't read any more
502 bytesToRead = 0
503 else:
504 # user will use up the buffer, so read some more
505 lr = len(self.rawbuffer)
506 bytesToRead = min(bytesToRead, self.compreadsize - lr)
507
508 # avoid reading past end of file contents
509 if bytesToRead + self.bytes_read > self.compress_size:
510 bytesToRead = self.compress_size - self.bytes_read
511
512 # try to read from file (if necessary)
513 if bytesToRead > 0:
514 bytes = self.fileobj.read(bytesToRead)
515 self.bytes_read += len(bytes)
516 self.rawbuffer += bytes
517
518 # handle contents of raw buffer
519 if self.rawbuffer:
520 newdata = self.rawbuffer
521 self.rawbuffer = ''
522
523 # decrypt new data if we were given an object to handle that
524 if newdata and self.decrypter is not None:
525 newdata = ''.join(map(self.decrypter, newdata))
526
527 # decompress newly read data if necessary
528 if newdata and self.compress_type == ZIP_DEFLATED:
529 newdata = self.dc.decompress(newdata)
530 self.rawbuffer = self.dc.unconsumed_tail
531 if self.eof and len(self.rawbuffer) == 0:
Tim Petersea5962f2007-03-12 18:07:52 +0000532 # we're out of raw bytes (both from the file and
533 # the local buffer); flush just to make sure the
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000534 # decompressor is done
535 newdata += self.dc.flush()
536 # prevent decompressor from being used again
537 self.dc = None
538
539 self.readbuffer += newdata
540
541
542 # return what the user asked for
543 if size is None or len(self.readbuffer) <= size:
544 bytes = self.readbuffer
545 self.readbuffer = ''
546 else:
547 bytes = self.readbuffer[:size]
548 self.readbuffer = self.readbuffer[size:]
549
550 return bytes
Tim Petersea5962f2007-03-12 18:07:52 +0000551
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000552
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000553class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000554 """ Class with methods to open, read, write, close, list zip files.
555
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000556 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000557
Fred Drake3d9091e2001-03-26 15:49:24 +0000558 file: Either the path to the file, or a file-like object.
559 If it is a path, the file will be opened and closed by ZipFile.
560 mode: The mode can be either read "r", write "w" or append "a".
561 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000562 allowZip64: if True ZipFile will create files with ZIP64 extensions when
563 needed, otherwise it will raise an exception when this would
564 be necessary.
565
Fred Drake3d9091e2001-03-26 15:49:24 +0000566 """
Fred Drake484d7352000-10-02 21:14:52 +0000567
Fred Drake90eac282001-02-28 05:29:34 +0000568 fp = None # Set here since __del__ checks it
569
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000570 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000571 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000572 if mode not in ("r", "w", "a"):
573 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
574
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000575 if compression == ZIP_STORED:
576 pass
577 elif compression == ZIP_DEFLATED:
578 if not zlib:
579 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000580 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000581 else:
582 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000583
584 self._allowZip64 = allowZip64
585 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000586 self.debug = 0 # Level of printing: 0 through 3
587 self.NameToInfo = {} # Find file info given name
588 self.filelist = [] # List of ZipInfo instances for archive
589 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000590 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000591 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000592
Fred Drake3d9091e2001-03-26 15:49:24 +0000593 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000594 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000595 self._filePassed = 0
596 self.filename = file
597 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000598 try:
599 self.fp = open(file, modeDict[mode])
600 except IOError:
601 if mode == 'a':
602 mode = key = 'w'
603 self.fp = open(file, modeDict[mode])
604 else:
605 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000606 else:
607 self._filePassed = 1
608 self.fp = file
609 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000610
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000611 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000612 self._GetContents()
613 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000614 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000615 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000616 try: # See if file is a zip file
617 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000618 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000619 self.fp.seek(self.start_dir, 0)
620 except BadZipfile: # file is not a zip file, just append
621 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000622 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000623 if not self._filePassed:
624 self.fp.close()
625 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000626 raise RuntimeError, 'Mode must be "r", "w" or "a"'
627
628 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000629 """Read the directory, making sure we close the file if the format
630 is bad."""
631 try:
632 self._RealGetContents()
633 except BadZipfile:
634 if not self._filePassed:
635 self.fp.close()
636 self.fp = None
637 raise
638
639 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000640 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000641 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000642 endrec = _EndRecData(fp)
643 if not endrec:
644 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000645 if self.debug > 1:
646 print endrec
Tim Peterse1190062001-01-15 03:34:38 +0000647 size_cd = endrec[5] # bytes in central directory
648 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000649 self.comment = endrec[8] # archive comment
650 # endrec[9] is the offset of the "End of Central Dir" record
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000651 if endrec[9] > ZIP64_LIMIT:
652 x = endrec[9] - size_cd - 56 - 20
653 else:
654 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000655 # "concat" is zero, unless zip was concatenated to another file
656 concat = x - offset_cd
657 if self.debug > 2:
658 print "given, inferred, offset", offset_cd, x, concat
659 # self.start_dir: Position of start of central directory
660 self.start_dir = offset_cd + concat
661 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000662 data = fp.read(size_cd)
663 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000664 total = 0
665 while total < size_cd:
666 centdir = fp.read(46)
667 total = total + 46
668 if centdir[0:4] != stringCentralDir:
669 raise BadZipfile, "Bad magic number for central directory"
670 centdir = struct.unpack(structCentralDir, centdir)
671 if self.debug > 2:
672 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000673 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000674 # Create ZipInfo instance to store file information
675 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000676 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
677 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
678 total = (total + centdir[_CD_FILENAME_LENGTH]
679 + centdir[_CD_EXTRA_FIELD_LENGTH]
680 + centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000681 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000682 (x.create_version, x.create_system, x.extract_version, x.reserved,
683 x.flag_bits, x.compress_type, t, d,
684 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
685 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
686 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000687 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000688 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000689 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000690
691 x._decodeExtra()
692 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000693 self.filelist.append(x)
694 self.NameToInfo[x.filename] = x
695 if self.debug > 2:
696 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000697
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698
699 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000700 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000701 l = []
702 for data in self.filelist:
703 l.append(data.filename)
704 return l
705
706 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000707 """Return a list of class ZipInfo instances for files in the
708 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 return self.filelist
710
711 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000712 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
714 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000715 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
717
718 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000719 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 for zinfo in self.filelist:
721 try:
Tim Peterse1190062001-01-15 03:34:38 +0000722 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000723 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724 return zinfo.filename
725
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000726
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000728 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000729 info = self.NameToInfo.get(name)
730 if info is None:
731 raise KeyError(
732 'There is no item named %r in the archive' % name)
733
734 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000736 def setpassword(self, pwd):
737 """Set default password for encrypted files."""
738 self.pwd = pwd
739
740 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000741 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000742 return self.open(name, "r", pwd).read()
743
744 def open(self, name, mode="r", pwd=None):
745 """Return file-like object for 'name'."""
746 if mode not in ("r", "U", "rU"):
747 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000748 if not self.fp:
749 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000750 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000751
Tim Petersea5962f2007-03-12 18:07:52 +0000752 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000753 # given a file object in the constructor
754 if self._filePassed:
755 zef_file = self.fp
756 else:
757 zef_file = open(self.filename, 'rb')
758
759 # Get info object for name
760 zinfo = self.getinfo(name)
761
762 filepos = zef_file.tell()
763
764 zef_file.seek(zinfo.header_offset, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000765
766 # Skip the file header:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000767 fheader = zef_file.read(30)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000768 if fheader[0:4] != stringFileHeader:
769 raise BadZipfile, "Bad magic number for file header"
770
771 fheader = struct.unpack(structFileHeader, fheader)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000772 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000773 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000774 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000775
776 if fname != zinfo.orig_filename:
777 raise BadZipfile, \
778 'File name in directory "%s" and header "%s" differ.' % (
779 zinfo.orig_filename, fname)
780
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000781 # check for encrypted flag & handle password
782 is_encrypted = zinfo.flag_bits & 0x1
783 zd = None
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000784 if is_encrypted:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000785 if not pwd:
786 pwd = self.pwd
787 if not pwd:
788 raise RuntimeError, "File %s is encrypted, " \
789 "password required for extraction" % name
790
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000791 zd = _ZipDecrypter(pwd)
792 # The first 12 bytes in the cypher stream is an encryption header
793 # used to strengthen the algorithm. The first 11 bytes are
794 # completely random, while the 12th contains the MSB of the CRC,
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000795 # or the MSB of the file time depending on the header type
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000796 # and is used to check the correctness of the password.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000797 bytes = zef_file.read(12)
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000798 h = map(zd, bytes[0:12])
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000799 if zinfo.flag_bits & 0x8:
800 # compare against the file type from extended local headers
801 check_byte = (zinfo._raw_time >> 8) & 0xff
802 else:
803 # compare against the CRC otherwise
804 check_byte = (zinfo.CRC >> 24) & 0xff
805 if ord(h[11]) != check_byte:
806 raise RuntimeError("Bad password for file", name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000807
808 # build and return a ZipExtFile
809 if zd is None:
810 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 else:
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000812 zef = ZipExtFile(zef_file, zinfo, zd)
813
814 # set universal newlines on ZipExtFile if necessary
815 if "U" in mode:
816 zef.set_univ_newlines(True)
817 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818
Georg Brandl62416bc2008-01-07 18:47:44 +0000819 def extract(self, member, path=None, pwd=None):
820 """Extract a member from the archive to the current working directory,
821 using its full name. Its file information is extracted as accurately
822 as possible. `member' may be a filename or a ZipInfo object. You can
823 specify a different directory using `path'.
824 """
825 if not isinstance(member, ZipInfo):
826 member = self.getinfo(member)
827
828 if path is None:
829 path = os.getcwd()
830
831 return self._extract_member(member, path, pwd)
832
833 def extractall(self, path=None, members=None, pwd=None):
834 """Extract all members from the archive to the current working
835 directory. `path' specifies a different directory to extract to.
836 `members' is optional and must be a subset of the list returned
837 by namelist().
838 """
839 if members is None:
840 members = self.namelist()
841
842 for zipinfo in members:
843 self.extract(zipinfo, path, pwd)
844
845 def _extract_member(self, member, targetpath, pwd):
846 """Extract the ZipInfo object 'member' to a physical
847 file on the path targetpath.
848 """
849 # build the destination pathname, replacing
850 # forward slashes to platform specific separators.
851 if targetpath[-1:] == "/":
852 targetpath = targetpath[:-1]
853
854 # don't include leading "/" from file name if present
855 if os.path.isabs(member.filename):
856 targetpath = os.path.join(targetpath, member.filename[1:])
857 else:
858 targetpath = os.path.join(targetpath, member.filename)
859
860 targetpath = os.path.normpath(targetpath)
861
862 # Create all upper directories if necessary.
863 upperdirs = os.path.dirname(targetpath)
864 if upperdirs and not os.path.exists(upperdirs):
865 os.makedirs(upperdirs)
866
867 source = self.open(member.filename, pwd=pwd)
868 target = file(targetpath, "wb")
869 shutil.copyfileobj(source, target)
870 source.close()
871 target.close()
872
873 return targetpath
874
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000875 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000876 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000877 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000878 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000879 print "Duplicate name:", zinfo.filename
880 if self.mode not in ("w", "a"):
881 raise RuntimeError, 'write() requires mode "w" or "a"'
882 if not self.fp:
883 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000884 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000885 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
886 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000887 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000888 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
889 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000890 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000891 if zinfo.file_size > ZIP64_LIMIT:
892 if not self._allowZip64:
893 raise LargeZipFile("Filesize would require ZIP64 extensions")
894 if zinfo.header_offset > ZIP64_LIMIT:
895 if not self._allowZip64:
896 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000897
898 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000899 """Put the bytes from filename into the archive under the name
900 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000901 if not self.fp:
902 raise RuntimeError(
903 "Attempt to write to ZIP archive that was already closed")
904
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000905 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000906 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 date_time = mtime[0:6]
908 # Create ZipInfo instance to store file information
909 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000910 arcname = filename
911 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
912 while arcname[0] in (os.sep, os.altsep):
913 arcname = arcname[1:]
914 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +0000915 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000916 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000917 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000918 else:
Tim Peterse1190062001-01-15 03:34:38 +0000919 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000920
921 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000922 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000923 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000924
925 self._writecheck(zinfo)
926 self._didModify = True
927 fp = open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000928 # Must overwrite CRC and sizes with correct data later
929 zinfo.CRC = CRC = 0
930 zinfo.compress_size = compress_size = 0
931 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000932 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000933 if zinfo.compress_type == ZIP_DEFLATED:
934 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
935 zlib.DEFLATED, -15)
936 else:
937 cmpr = None
938 while 1:
939 buf = fp.read(1024 * 8)
940 if not buf:
941 break
942 file_size = file_size + len(buf)
943 CRC = binascii.crc32(buf, CRC)
944 if cmpr:
945 buf = cmpr.compress(buf)
946 compress_size = compress_size + len(buf)
947 self.fp.write(buf)
948 fp.close()
949 if cmpr:
950 buf = cmpr.flush()
951 compress_size = compress_size + len(buf)
952 self.fp.write(buf)
953 zinfo.compress_size = compress_size
954 else:
955 zinfo.compress_size = file_size
956 zinfo.CRC = CRC
957 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000958 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000959 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000960 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000961 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000962 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000963 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 self.filelist.append(zinfo)
965 self.NameToInfo[zinfo.filename] = zinfo
966
Just van Rossumb083cb32002-12-12 12:23:32 +0000967 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000968 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000969 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
970 the name of the file in the archive."""
971 if not isinstance(zinfo_or_arcname, ZipInfo):
972 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000973 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +0000974 zinfo.compress_type = self.compression
975 else:
976 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000977
978 if not self.fp:
979 raise RuntimeError(
980 "Attempt to write to ZIP archive that was already closed")
981
Tim Peterse1190062001-01-15 03:34:38 +0000982 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000983 zinfo.header_offset = self.fp.tell() # Start of header bytes
984 self._writecheck(zinfo)
985 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000986 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987 if zinfo.compress_type == ZIP_DEFLATED:
988 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
989 zlib.DEFLATED, -15)
990 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000991 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000992 else:
993 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000994 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996 self.fp.write(bytes)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000997 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000998 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000999 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001000 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001001 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001002 self.filelist.append(zinfo)
1003 self.NameToInfo[zinfo.filename] = zinfo
1004
1005 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001006 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001007 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008
1009 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001010 """Close the file, and for mode "w" and "a" write the ending
1011 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001012 if self.fp is None:
1013 return
Tim Petersa608bb22006-06-15 18:06:29 +00001014
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001015 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 count = 0
1017 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001018 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019 count = count + 1
1020 dt = zinfo.date_time
1021 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001022 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001023 extra = []
1024 if zinfo.file_size > ZIP64_LIMIT \
1025 or zinfo.compress_size > ZIP64_LIMIT:
1026 extra.append(zinfo.file_size)
1027 extra.append(zinfo.compress_size)
1028 file_size = 0xffffffff #-1
1029 compress_size = 0xffffffff #-1
1030 else:
1031 file_size = zinfo.file_size
1032 compress_size = zinfo.compress_size
1033
1034 if zinfo.header_offset > ZIP64_LIMIT:
1035 extra.append(zinfo.header_offset)
Tim Petersf79c32d2006-07-31 02:53:03 +00001036 header_offset = -1 # struct "l" format: 32 one bits
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001037 else:
1038 header_offset = zinfo.header_offset
1039
1040 extra_data = zinfo.extra
1041 if extra:
1042 # Append a ZIP64 field to the extra's
1043 extra_data = struct.pack(
1044 '<hh' + 'q'*len(extra),
1045 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001046
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001047 extract_version = max(45, zinfo.extract_version)
1048 create_version = max(45, zinfo.create_version)
1049 else:
1050 extract_version = zinfo.extract_version
1051 create_version = zinfo.create_version
1052
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 centdir = struct.pack(structCentralDir,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001054 stringCentralDir, create_version,
1055 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001057 zinfo.CRC, compress_size, file_size,
1058 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 0, zinfo.internal_attr, zinfo.external_attr,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001060 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 self.fp.write(centdir)
1062 self.fp.write(zinfo.filename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001063 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001064 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001065
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001066 pos2 = self.fp.tell()
1067 # Write end-of-zip-archive record
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001068 if pos1 > ZIP64_LIMIT:
1069 # Need to write the ZIP64 end-of-archive records
1070 zip64endrec = struct.pack(
1071 structEndArchive64, stringEndArchive64,
1072 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1073 self.fp.write(zip64endrec)
1074
1075 zip64locrec = struct.pack(
Tim Petersa608bb22006-06-15 18:06:29 +00001076 structEndArchive64Locator,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001077 stringEndArchive64Locator, 0, pos2, 1)
1078 self.fp.write(zip64locrec)
1079
Tim Peters352bf0d2006-07-31 02:40:23 +00001080 # XXX Why is `pos3` computed next? It's never referenced.
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001081 pos3 = self.fp.tell()
1082 endrec = struct.pack(structEndArchive, stringEndArchive,
Tim Peters352bf0d2006-07-31 02:40:23 +00001083 0, 0, count, count, pos2 - pos1, -1, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001084 self.fp.write(endrec)
1085
1086 else:
1087 endrec = struct.pack(structEndArchive, stringEndArchive,
1088 0, 0, count, count, pos2 - pos1, pos1, 0)
1089 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001090 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001091 if not self._filePassed:
1092 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 self.fp = None
1094
1095
1096class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001097 """Class to create ZIP archives with Python library files and packages."""
1098
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001099 def writepy(self, pathname, basename = ""):
1100 """Add all files from "pathname" to the ZIP archive.
1101
Fred Drake484d7352000-10-02 21:14:52 +00001102 If pathname is a package directory, search the directory and
1103 all package subdirectories recursively for all *.py and enter
1104 the modules into the archive. If pathname is a plain
1105 directory, listdir *.py and enter all modules. Else, pathname
1106 must be a Python *.py file and the module will be put into the
1107 archive. Added modules are always module.pyo or module.pyc.
1108 This method will compile the module.py into module.pyc if
1109 necessary.
1110 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 dir, name = os.path.split(pathname)
1112 if os.path.isdir(pathname):
1113 initname = os.path.join(pathname, "__init__.py")
1114 if os.path.isfile(initname):
1115 # This is a package directory, add it
1116 if basename:
1117 basename = "%s/%s" % (basename, name)
1118 else:
1119 basename = name
1120 if self.debug:
1121 print "Adding package in", pathname, "as", basename
1122 fname, arcname = self._get_codename(initname[0:-3], basename)
1123 if self.debug:
1124 print "Adding", arcname
1125 self.write(fname, arcname)
1126 dirlist = os.listdir(pathname)
1127 dirlist.remove("__init__.py")
1128 # Add all *.py files and package subdirectories
1129 for filename in dirlist:
1130 path = os.path.join(pathname, filename)
1131 root, ext = os.path.splitext(filename)
1132 if os.path.isdir(path):
1133 if os.path.isfile(os.path.join(path, "__init__.py")):
1134 # This is a package directory, add it
1135 self.writepy(path, basename) # Recursive call
1136 elif ext == ".py":
1137 fname, arcname = self._get_codename(path[0:-3],
1138 basename)
1139 if self.debug:
1140 print "Adding", arcname
1141 self.write(fname, arcname)
1142 else:
1143 # This is NOT a package directory, add its files at top level
1144 if self.debug:
1145 print "Adding files from directory", pathname
1146 for filename in os.listdir(pathname):
1147 path = os.path.join(pathname, filename)
1148 root, ext = os.path.splitext(filename)
1149 if ext == ".py":
1150 fname, arcname = self._get_codename(path[0:-3],
1151 basename)
1152 if self.debug:
1153 print "Adding", arcname
1154 self.write(fname, arcname)
1155 else:
1156 if pathname[-3:] != ".py":
1157 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001158 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159 fname, arcname = self._get_codename(pathname[0:-3], basename)
1160 if self.debug:
1161 print "Adding file", arcname
1162 self.write(fname, arcname)
1163
1164 def _get_codename(self, pathname, basename):
1165 """Return (filename, archivename) for the path.
1166
Fred Drake484d7352000-10-02 21:14:52 +00001167 Given a module name path, return the correct file path and
1168 archive name, compiling if necessary. For example, given
1169 /python/lib/string, return (/python/lib/string.pyc, string).
1170 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 file_py = pathname + ".py"
1172 file_pyc = pathname + ".pyc"
1173 file_pyo = pathname + ".pyo"
1174 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001175 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001176 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001178 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001179 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180 if self.debug:
1181 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001182 try:
1183 py_compile.compile(file_py, file_pyc, None, True)
1184 except py_compile.PyCompileError,err:
1185 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 fname = file_pyc
1187 else:
1188 fname = file_pyc
1189 archivename = os.path.split(fname)[1]
1190 if basename:
1191 archivename = "%s/%s" % (basename, archivename)
1192 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001193
1194
1195def main(args = None):
1196 import textwrap
1197 USAGE=textwrap.dedent("""\
1198 Usage:
1199 zipfile.py -l zipfile.zip # Show listing of a zipfile
1200 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1201 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1202 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1203 """)
1204 if args is None:
1205 args = sys.argv[1:]
1206
1207 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1208 print USAGE
1209 sys.exit(1)
1210
1211 if args[0] == '-l':
1212 if len(args) != 2:
1213 print USAGE
1214 sys.exit(1)
1215 zf = ZipFile(args[1], 'r')
1216 zf.printdir()
1217 zf.close()
1218
1219 elif args[0] == '-t':
1220 if len(args) != 2:
1221 print USAGE
1222 sys.exit(1)
1223 zf = ZipFile(args[1], 'r')
1224 zf.testzip()
1225 print "Done testing"
1226
1227 elif args[0] == '-e':
1228 if len(args) != 3:
1229 print USAGE
1230 sys.exit(1)
1231
1232 zf = ZipFile(args[1], 'r')
1233 out = args[2]
1234 for path in zf.namelist():
Tim Petersa608bb22006-06-15 18:06:29 +00001235 if path.startswith('./'):
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001236 tgt = os.path.join(out, path[2:])
1237 else:
1238 tgt = os.path.join(out, path)
1239
1240 tgtdir = os.path.dirname(tgt)
1241 if not os.path.exists(tgtdir):
1242 os.makedirs(tgtdir)
1243 fp = open(tgt, 'wb')
1244 fp.write(zf.read(path))
1245 fp.close()
1246 zf.close()
1247
1248 elif args[0] == '-c':
1249 if len(args) < 3:
1250 print USAGE
1251 sys.exit(1)
1252
1253 def addToZip(zf, path, zippath):
1254 if os.path.isfile(path):
1255 zf.write(path, zippath, ZIP_DEFLATED)
1256 elif os.path.isdir(path):
1257 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001258 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001259 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001260 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001261
1262 zf = ZipFile(args[1], 'w', allowZip64=True)
1263 for src in args[2:]:
1264 addToZip(zf, src, os.path.basename(src))
1265
1266 zf.close()
1267
1268if __name__ == "__main__":
1269 main()