blob: 4791aea5ad45fffac8a3bb19c2f3c0ebfcc4daef [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Martin v. Löwis00756902006-02-05 17:09:41 +00006import struct, os, time, sys
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000011except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000012 zlib = None
13
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000019
20
21class LargeZipFile(Exception):
22 """
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Thomas Wouters0e3f5912006-08-11 14:57:12 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
37structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000038stringEndArchive = b"PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000039structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000040stringCentralDir = b"PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000041structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000042stringFileHeader = b"PK\003\004" # magic number for file header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000044stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000046stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000047
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +000087 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Thomas Wouters0e3f5912006-08-11 14:57:12 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
104 if sig != stringEndArchive64Locator:
105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
110 # Assume no 'zip64 extensible data'
111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
117 if sig != stringEndArchive64:
118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000143 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000163 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
191 )
192
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000194 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000195
196 # Terminate the file name at the first null byte. Null bytes in file
197 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000198 null_byte = filename.find(chr(0))
199 if null_byte >= 0:
200 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 # This is used to ensure paths in generated ZIP files always use
202 # forward slashes as the directory separator, as required by the
203 # ZIP format specification.
204 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000205 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
Greg Ward8e36d282003-06-18 00:53:06 +0000207 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000209 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000210 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000211 self.comment = b"" # Comment for each file
212 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000213 if sys.platform == 'win32':
214 self.create_system = 0 # System which created ZIP archive
215 else:
216 # Assume everything else is unix-y
217 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000218 self.create_version = 20 # Version which created ZIP archive
219 self.extract_version = 20 # Version needed to extract archive
220 self.reserved = 0 # Must be zero
221 self.flag_bits = 0 # ZIP flag bits
222 self.volume = 0 # Volume number of file header
223 self.internal_attr = 0 # Internal attributes
224 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000225 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000226 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000227 # CRC CRC-32 of the uncompressed file
228 # compress_size Size of the compressed file
229 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000230
231 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000232 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233 dt = self.date_time
234 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000235 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000237 # Set these to zero because we write them after the file data
238 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 else:
Tim Peterse1190062001-01-15 03:34:38 +0000240 CRC = self.CRC
241 compress_size = self.compress_size
242 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000243
244 extra = self.extra
245
246 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
247 # File is larger than what fits into a 4 byte integer,
248 # fall back to the ZIP64 extension
249 fmt = '<hhqq'
250 extra = extra + struct.pack(fmt,
251 1, struct.calcsize(fmt)-4, file_size, compress_size)
252 file_size = 0xffffffff # -1
253 compress_size = 0xffffffff # -1
254 self.extract_version = max(45, self.extract_version)
255 self.create_version = max(45, self.extract_version)
256
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000257 header = struct.pack(structFileHeader, stringFileHeader,
258 self.extract_version, self.reserved, self.flag_bits,
259 self.compress_type, dostime, dosdate, CRC,
260 compress_size, file_size,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 len(self.filename), len(extra))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000262 return header + self.filename.encode("utf-8") + extra
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000263
264 def _decodeExtra(self):
265 # Try to decode the extra field.
266 extra = self.extra
267 unpack = struct.unpack
268 while extra:
269 tp, ln = unpack('<hh', extra[:4])
270 if tp == 1:
271 if ln >= 24:
272 counts = unpack('<qqq', extra[4:28])
273 elif ln == 16:
274 counts = unpack('<qq', extra[4:20])
275 elif ln == 8:
276 counts = unpack('<q', extra[4:12])
277 elif ln == 0:
278 counts = ()
279 else:
280 raise RuntimeError, "Corrupt extra field %s"%(ln,)
281
282 idx = 0
283
284 # ZIP64 extension (large files and/or large archives)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000285 if self.file_size == -1 or self.file_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286 self.file_size = counts[idx]
287 idx += 1
288
Guido van Rossume2a383d2007-01-15 16:59:06 +0000289 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 self.compress_size = counts[idx]
291 idx += 1
292
Guido van Rossume2a383d2007-01-15 16:59:06 +0000293 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 old = self.header_offset
295 self.header_offset = counts[idx]
296 idx+=1
297
298 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299
300
Thomas Wouterscf297e42007-02-23 15:07:44 +0000301class _ZipDecrypter:
302 """Class to handle decryption of files stored within a ZIP archive.
303
304 ZIP supports a password-based form of encryption. Even though known
305 plaintext attacks have been found against it, it is still useful
306 for low-level securicy.
307
308 Usage:
309 zd = _ZipDecrypter(mypwd)
310 plain_char = zd(cypher_char)
311 plain_text = map(zd, cypher_text)
312 """
313
314 def _GenerateCRCTable():
315 """Generate a CRC-32 table.
316
317 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
318 internal keys. We noticed that a direct implementation is faster than
319 relying on binascii.crc32().
320 """
321 poly = 0xedb88320
322 table = [0] * 256
323 for i in range(256):
324 crc = i
325 for j in range(8):
326 if crc & 1:
327 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
328 else:
329 crc = ((crc >> 1) & 0x7FFFFFFF)
330 table[i] = crc
331 return table
332 crctable = _GenerateCRCTable()
333
334 def _crc32(self, ch, crc):
335 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000336 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000337
338 def __init__(self, pwd):
339 self.key0 = 305419896
340 self.key1 = 591751049
341 self.key2 = 878082192
342 for p in pwd:
343 self._UpdateKeys(p)
344
345 def _UpdateKeys(self, c):
346 self.key0 = self._crc32(c, self.key0)
347 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
348 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000349 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000350
351 def __call__(self, c):
352 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000353 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000354 k = self.key2 | 2
355 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000356 self._UpdateKeys(c)
357 return c
358
Guido van Rossumd8faa362007-04-27 19:54:29 +0000359class ZipExtFile:
360 """File-like object for reading an archive member.
361 Is returned by ZipFile.open().
362 """
363
364 def __init__(self, fileobj, zipinfo, decrypt=None):
365 self.fileobj = fileobj
366 self.decrypter = decrypt
367 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000368 self.rawbuffer = b''
369 self.readbuffer = b''
370 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000371 self.eof = False
372 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000373 self.nlSeps = (b"\n", )
374 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375
376 self.compress_type = zipinfo.compress_type
377 self.compress_size = zipinfo.compress_size
378
379 self.closed = False
380 self.mode = "r"
381 self.name = zipinfo.filename
382
383 # read from compressed files in 64k blocks
384 self.compreadsize = 64*1024
385 if self.compress_type == ZIP_DEFLATED:
386 self.dc = zlib.decompressobj(-15)
387
388 def set_univ_newlines(self, univ_newlines):
389 self.univ_newlines = univ_newlines
390
391 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000392 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000393 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000394 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395
396 def __iter__(self):
397 return self
398
399 def __next__(self):
400 nextline = self.readline()
401 if not nextline:
402 raise StopIteration()
403
404 return nextline
405
406 def close(self):
407 self.closed = True
408
409 def _checkfornewline(self):
410 nl, nllen = -1, -1
411 if self.linebuffer:
412 # ugly check for cases where half of an \r\n pair was
413 # read on the last pass, and the \r was discarded. In this
414 # case we just throw away the \n at the start of the buffer.
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000415 if (self.lastdiscard, self.linebuffer[0]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000416 self.linebuffer = self.linebuffer[1:]
417
418 for sep in self.nlSeps:
419 nl = self.linebuffer.find(sep)
420 if nl >= 0:
421 nllen = len(sep)
422 return nl, nllen
423
424 return nl, nllen
425
426 def readline(self, size = -1):
427 """Read a line with approx. size. If size is negative,
428 read a whole line.
429 """
430 if size < 0:
431 size = sys.maxint
432 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000433 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000434
435 # check for a newline already in buffer
436 nl, nllen = self._checkfornewline()
437
438 if nl >= 0:
439 # the next line was already in the buffer
440 nl = min(nl, size)
441 else:
442 # no line break in buffer - try to read more
443 size -= len(self.linebuffer)
444 while nl < 0 and size > 0:
445 buf = self.read(min(size, 100))
446 if not buf:
447 break
448 self.linebuffer += buf
449 size -= len(buf)
450
451 # check for a newline in buffer
452 nl, nllen = self._checkfornewline()
453
454 # we either ran out of bytes in the file, or
455 # met the specified size limit without finding a newline,
456 # so return current buffer
457 if nl < 0:
458 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000459 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 return s
461
462 buf = self.linebuffer[:nl]
463 self.lastdiscard = self.linebuffer[nl:nl + nllen]
464 self.linebuffer = self.linebuffer[nl + nllen:]
465
466 # line is always returned with \n as newline char (except possibly
467 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469
470 def readlines(self, sizehint = -1):
471 """Return a list with all (following) lines. The sizehint parameter
472 is ignored in this implementation.
473 """
474 result = []
475 while True:
476 line = self.readline()
477 if not line: break
478 result.append(line)
479 return result
480
481 def read(self, size = None):
482 # act like file() obj and return empty string if size is 0
483 if size == 0:
484 return ''
485
486 # determine read size
487 bytesToRead = self.compress_size - self.bytes_read
488
489 # adjust read size for encrypted files since the first 12 bytes
490 # are for the encryption/password information
491 if self.decrypter is not None:
492 bytesToRead -= 12
493
494 if size is not None and size >= 0:
495 if self.compress_type == ZIP_STORED:
496 lr = len(self.readbuffer)
497 bytesToRead = min(bytesToRead, size - lr)
498 elif self.compress_type == ZIP_DEFLATED:
499 if len(self.readbuffer) > size:
500 # the user has requested fewer bytes than we've already
501 # pulled through the decompressor; don't read any more
502 bytesToRead = 0
503 else:
504 # user will use up the buffer, so read some more
505 lr = len(self.rawbuffer)
506 bytesToRead = min(bytesToRead, self.compreadsize - lr)
507
508 # avoid reading past end of file contents
509 if bytesToRead + self.bytes_read > self.compress_size:
510 bytesToRead = self.compress_size - self.bytes_read
511
512 # try to read from file (if necessary)
513 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000514 data = self.fileobj.read(bytesToRead)
515 self.bytes_read += len(data)
516 try:
517 self.rawbuffer += data
518 except:
519 print(repr(self.fileobj), repr(self.rawbuffer),
520 repr(data))
521 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000522
523 # handle contents of raw buffer
524 if self.rawbuffer:
525 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000526 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
528 # decrypt new data if we were given an object to handle that
529 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000530 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531
532 # decompress newly read data if necessary
533 if newdata and self.compress_type == ZIP_DEFLATED:
534 newdata = self.dc.decompress(newdata)
535 self.rawbuffer = self.dc.unconsumed_tail
536 if self.eof and len(self.rawbuffer) == 0:
537 # we're out of raw bytes (both from the file and
538 # the local buffer); flush just to make sure the
539 # decompressor is done
540 newdata += self.dc.flush()
541 # prevent decompressor from being used again
542 self.dc = None
543
544 self.readbuffer += newdata
545
546
547 # return what the user asked for
548 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000549 data = self.readbuffer
550 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000551 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000552 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 self.readbuffer = self.readbuffer[size:]
554
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000555 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
557
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000559 """ Class with methods to open, read, write, close, list zip files.
560
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000561 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000562
Fred Drake3d9091e2001-03-26 15:49:24 +0000563 file: Either the path to the file, or a file-like object.
564 If it is a path, the file will be opened and closed by ZipFile.
565 mode: The mode can be either read "r", write "w" or append "a".
566 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000567 allowZip64: if True ZipFile will create files with ZIP64 extensions when
568 needed, otherwise it will raise an exception when this would
569 be necessary.
570
Fred Drake3d9091e2001-03-26 15:49:24 +0000571 """
Fred Drake484d7352000-10-02 21:14:52 +0000572
Fred Drake90eac282001-02-28 05:29:34 +0000573 fp = None # Set here since __del__ checks it
574
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000575 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000576 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000577 self._allowZip64 = allowZip64
578 self._didModify = False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000579 if compression == ZIP_STORED:
580 pass
581 elif compression == ZIP_DEFLATED:
582 if not zlib:
583 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000584 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000585 else:
586 raise RuntimeError, "That compression method is not supported"
Tim Peterse1190062001-01-15 03:34:38 +0000587 self.debug = 0 # Level of printing: 0 through 3
588 self.NameToInfo = {} # Find file info given name
589 self.filelist = [] # List of ZipInfo instances for archive
590 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000591 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000592 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000593
Fred Drake3d9091e2001-03-26 15:49:24 +0000594 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000595 if isinstance(file, basestring):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000596 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000597 self._filePassed = 0
598 self.filename = file
599 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000600 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000601 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000602 except IOError:
603 if mode == 'a':
604 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000605 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000606 else:
607 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000608 else:
609 self._filePassed = 1
610 self.fp = file
611 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000612
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000613 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000614 self._GetContents()
615 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000616 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000618 try: # See if file is a zip file
619 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000620 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000621 self.fp.seek(self.start_dir, 0)
622 except BadZipfile: # file is not a zip file, just append
623 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000625 if not self._filePassed:
626 self.fp.close()
627 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 raise RuntimeError, 'Mode must be "r", "w" or "a"'
629
630 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000631 """Read the directory, making sure we close the file if the format
632 is bad."""
633 try:
634 self._RealGetContents()
635 except BadZipfile:
636 if not self._filePassed:
637 self.fp.close()
638 self.fp = None
639 raise
640
641 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000642 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000643 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000644 endrec = _EndRecData(fp)
645 if not endrec:
646 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000647 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000648 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000649 size_cd = endrec[5] # bytes in central directory
650 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000651 self.comment = endrec[8] # archive comment
652 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000653 if endrec[9] > ZIP64_LIMIT:
654 x = endrec[9] - size_cd - 56 - 20
655 else:
656 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000657 # "concat" is zero, unless zip was concatenated to another file
658 concat = x - offset_cd
659 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000660 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000661 # self.start_dir: Position of start of central directory
662 self.start_dir = offset_cd + concat
663 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000664 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000665 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000666 total = 0
667 while total < size_cd:
668 centdir = fp.read(46)
669 total = total + 46
670 if centdir[0:4] != stringCentralDir:
671 raise BadZipfile, "Bad magic number for central directory"
672 centdir = struct.unpack(structCentralDir, centdir)
673 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000674 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000675 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 # Create ZipInfo instance to store file information
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000677 x = ZipInfo(str(filename))
Fred Drake3e038e52001-02-28 17:56:26 +0000678 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
679 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
680 total = (total + centdir[_CD_FILENAME_LENGTH]
681 + centdir[_CD_EXTRA_FIELD_LENGTH]
682 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000683 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 (x.create_version, x.create_system, x.extract_version, x.reserved,
685 x.flag_bits, x.compress_type, t, d,
686 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
687 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
688 # Convert date/time code to (year, month, day, hour, min, sec)
689 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000690 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000691
692 x._decodeExtra()
693 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000694 self.filelist.append(x)
695 self.NameToInfo[x.filename] = x
696 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000697 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000698
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699
700 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000701 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000702 l = []
703 for data in self.filelist:
704 l.append(data.filename)
705 return l
706
707 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000708 """Return a list of class ZipInfo instances for files in the
709 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710 return self.filelist
711
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000712 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000713 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000714 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
715 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 for zinfo in self.filelist:
717 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000718 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
719 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720
721 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000722 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 for zinfo in self.filelist:
724 try:
Tim Peterse1190062001-01-15 03:34:38 +0000725 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000726 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 return zinfo.filename
728
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000729
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000731 """Return the instance of ZipInfo given 'name'."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 return self.NameToInfo[name]
733
Thomas Wouterscf297e42007-02-23 15:07:44 +0000734 def setpassword(self, pwd):
735 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000736 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000737 self.pwd = pwd
738
739 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000740 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000741 return self.open(name, "r", pwd).read()
742
743 def open(self, name, mode="r", pwd=None):
744 """Return file-like object for 'name'."""
745 if mode not in ("r", "U", "rU"):
746 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000747 if not self.fp:
748 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000749 "Attempt to read ZIP archive that was already closed"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000750
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751 # Only open a new file for instances where we were not
752 # given a file object in the constructor
753 if self._filePassed:
754 zef_file = self.fp
755 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000756 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000757
758 # Get info object for name
759 zinfo = self.getinfo(name)
760
761 filepos = zef_file.tell()
762
763 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000764
765 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000767 if fheader[0:4] != stringFileHeader:
768 raise BadZipfile, "Bad magic number for file header"
769
770 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000771 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000772 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000774
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000775 if fname != zinfo.orig_filename.encode("utf-8"):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000776 raise BadZipfile, \
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000777 'File name in directory %r and header %r differ.' % (
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778 zinfo.orig_filename, fname)
779
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780 # check for encrypted flag & handle password
781 is_encrypted = zinfo.flag_bits & 0x1
782 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000783 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000784 if not pwd:
785 pwd = self.pwd
786 if not pwd:
787 raise RuntimeError, "File %s is encrypted, " \
788 "password required for extraction" % name
789
Thomas Wouterscf297e42007-02-23 15:07:44 +0000790 zd = _ZipDecrypter(pwd)
791 # The first 12 bytes in the cypher stream is an encryption header
792 # used to strengthen the algorithm. The first 11 bytes are
793 # completely random, while the 12th contains the MSB of the CRC,
794 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000795 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000796 h = list(map(zd, bytes[0:12]))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000797 if h[11] != ((zinfo.CRC>>24) & 255):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000798 raise RuntimeError, "Bad password for file %s" % name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799
800 # build and return a ZipExtFile
801 if zd is None:
802 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 zef = ZipExtFile(zef_file, zinfo, zd)
805
806 # set universal newlines on ZipExtFile if necessary
807 if "U" in mode:
808 zef.set_univ_newlines(True)
809 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810
811 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000812 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000813 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000814 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000815 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000816 if self.mode not in ("w", "a"):
817 raise RuntimeError, 'write() requires mode "w" or "a"'
818 if not self.fp:
819 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000820 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
822 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000823 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
825 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000826 "That compression method is not supported"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000827 if zinfo.file_size > ZIP64_LIMIT:
828 if not self._allowZip64:
829 raise LargeZipFile("Filesize would require ZIP64 extensions")
830 if zinfo.header_offset > ZIP64_LIMIT:
831 if not self._allowZip64:
832 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833
834 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000835 """Put the bytes from filename into the archive under the name
836 arcname."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000838 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000839 date_time = mtime[0:6]
840 # Create ZipInfo instance to store file information
841 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000842 arcname = filename
843 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
844 while arcname[0] in (os.sep, os.altsep):
845 arcname = arcname[1:]
846 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000847 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000849 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850 else:
Tim Peterse1190062001-01-15 03:34:38 +0000851 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000852
853 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000854 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000855 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000856
857 self._writecheck(zinfo)
858 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000859 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000860 # Must overwrite CRC and sizes with correct data later
861 zinfo.CRC = CRC = 0
862 zinfo.compress_size = compress_size = 0
863 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000864 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000865 if zinfo.compress_type == ZIP_DEFLATED:
866 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
867 zlib.DEFLATED, -15)
868 else:
869 cmpr = None
870 while 1:
871 buf = fp.read(1024 * 8)
872 if not buf:
873 break
874 file_size = file_size + len(buf)
875 CRC = binascii.crc32(buf, CRC)
876 if cmpr:
877 buf = cmpr.compress(buf)
878 compress_size = compress_size + len(buf)
879 self.fp.write(buf)
880 fp.close()
881 if cmpr:
882 buf = cmpr.flush()
883 compress_size = compress_size + len(buf)
884 self.fp.write(buf)
885 zinfo.compress_size = compress_size
886 else:
887 zinfo.compress_size = file_size
888 zinfo.CRC = CRC
889 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000890 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000891 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000892 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000893 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000894 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000895 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000896 self.filelist.append(zinfo)
897 self.NameToInfo[zinfo.filename] = zinfo
898
Just van Rossumb083cb32002-12-12 12:23:32 +0000899 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000900 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000901 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
902 the name of the file in the archive."""
903 if not isinstance(zinfo_or_arcname, ZipInfo):
904 zinfo = ZipInfo(filename=zinfo_or_arcname,
905 date_time=time.localtime(time.time()))
906 zinfo.compress_type = self.compression
907 else:
908 zinfo = zinfo_or_arcname
Tim Peterse1190062001-01-15 03:34:38 +0000909 zinfo.file_size = len(bytes) # Uncompressed size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000910 zinfo.header_offset = self.fp.tell() # Start of header bytes
911 self._writecheck(zinfo)
912 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000913 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000914 if zinfo.compress_type == ZIP_DEFLATED:
915 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
916 zlib.DEFLATED, -15)
917 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000918 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000919 else:
920 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000921 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000922 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000923 self.fp.write(bytes)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000924 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000925 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000926 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000927 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000928 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000929 self.filelist.append(zinfo)
930 self.NameToInfo[zinfo.filename] = zinfo
931
932 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000933 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000934 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000935
936 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000937 """Close the file, and for mode "w" and "a" write the ending
938 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000939 if self.fp is None:
940 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000941
942 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000943 count = 0
944 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000945 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000946 count = count + 1
947 dt = zinfo.date_time
948 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000949 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000950 extra = []
951 if zinfo.file_size > ZIP64_LIMIT \
952 or zinfo.compress_size > ZIP64_LIMIT:
953 extra.append(zinfo.file_size)
954 extra.append(zinfo.compress_size)
955 file_size = 0xffffffff #-1
956 compress_size = 0xffffffff #-1
957 else:
958 file_size = zinfo.file_size
959 compress_size = zinfo.compress_size
960
961 if zinfo.header_offset > ZIP64_LIMIT:
962 extra.append(zinfo.header_offset)
963 header_offset = -1 # struct "l" format: 32 one bits
964 else:
965 header_offset = zinfo.header_offset
966
967 extra_data = zinfo.extra
968 if extra:
969 # Append a ZIP64 field to the extra's
970 extra_data = struct.pack(
971 '<hh' + 'q'*len(extra),
972 1, 8*len(extra), *extra) + extra_data
973
974 extract_version = max(45, zinfo.extract_version)
975 create_version = max(45, zinfo.create_version)
976 else:
977 extract_version = zinfo.extract_version
978 create_version = zinfo.create_version
979
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000980 centdir = struct.pack(structCentralDir,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000981 stringCentralDir, create_version,
982 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000983 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000984 zinfo.CRC, compress_size, file_size,
985 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000986 0, zinfo.internal_attr, zinfo.external_attr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000987 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000988 self.fp.write(centdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000989 self.fp.write(zinfo.filename.encode("utf-8"))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000990 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000991 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000992
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000993 pos2 = self.fp.tell()
994 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000995 if pos1 > ZIP64_LIMIT:
996 # Need to write the ZIP64 end-of-archive records
997 zip64endrec = struct.pack(
998 structEndArchive64, stringEndArchive64,
999 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1000 self.fp.write(zip64endrec)
1001
1002 zip64locrec = struct.pack(
1003 structEndArchive64Locator,
1004 stringEndArchive64Locator, 0, pos2, 1)
1005 self.fp.write(zip64locrec)
1006
1007 # XXX Why is `pos3` computed next? It's never referenced.
1008 pos3 = self.fp.tell()
1009 endrec = struct.pack(structEndArchive, stringEndArchive,
1010 0, 0, count, count, pos2 - pos1, -1, 0)
1011 self.fp.write(endrec)
1012
1013 else:
1014 endrec = struct.pack(structEndArchive, stringEndArchive,
1015 0, 0, count, count, pos2 - pos1, pos1, 0)
1016 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001017 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001018 if not self._filePassed:
1019 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001020 self.fp = None
1021
1022
1023class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001024 """Class to create ZIP archives with Python library files and packages."""
1025
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026 def writepy(self, pathname, basename = ""):
1027 """Add all files from "pathname" to the ZIP archive.
1028
Fred Drake484d7352000-10-02 21:14:52 +00001029 If pathname is a package directory, search the directory and
1030 all package subdirectories recursively for all *.py and enter
1031 the modules into the archive. If pathname is a plain
1032 directory, listdir *.py and enter all modules. Else, pathname
1033 must be a Python *.py file and the module will be put into the
1034 archive. Added modules are always module.pyo or module.pyc.
1035 This method will compile the module.py into module.pyc if
1036 necessary.
1037 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001038 dir, name = os.path.split(pathname)
1039 if os.path.isdir(pathname):
1040 initname = os.path.join(pathname, "__init__.py")
1041 if os.path.isfile(initname):
1042 # This is a package directory, add it
1043 if basename:
1044 basename = "%s/%s" % (basename, name)
1045 else:
1046 basename = name
1047 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001048 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001049 fname, arcname = self._get_codename(initname[0:-3], basename)
1050 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001051 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001052 self.write(fname, arcname)
1053 dirlist = os.listdir(pathname)
1054 dirlist.remove("__init__.py")
1055 # Add all *.py files and package subdirectories
1056 for filename in dirlist:
1057 path = os.path.join(pathname, filename)
1058 root, ext = os.path.splitext(filename)
1059 if os.path.isdir(path):
1060 if os.path.isfile(os.path.join(path, "__init__.py")):
1061 # This is a package directory, add it
1062 self.writepy(path, basename) # Recursive call
1063 elif ext == ".py":
1064 fname, arcname = self._get_codename(path[0:-3],
1065 basename)
1066 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001067 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001068 self.write(fname, arcname)
1069 else:
1070 # This is NOT a package directory, add its files at top level
1071 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001072 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 for filename in os.listdir(pathname):
1074 path = os.path.join(pathname, filename)
1075 root, ext = os.path.splitext(filename)
1076 if ext == ".py":
1077 fname, arcname = self._get_codename(path[0:-3],
1078 basename)
1079 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001080 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 self.write(fname, arcname)
1082 else:
1083 if pathname[-3:] != ".py":
1084 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001085 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001086 fname, arcname = self._get_codename(pathname[0:-3], basename)
1087 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001088 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 self.write(fname, arcname)
1090
1091 def _get_codename(self, pathname, basename):
1092 """Return (filename, archivename) for the path.
1093
Fred Drake484d7352000-10-02 21:14:52 +00001094 Given a module name path, return the correct file path and
1095 archive name, compiling if necessary. For example, given
1096 /python/lib/string, return (/python/lib/string.pyc, string).
1097 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 file_py = pathname + ".py"
1099 file_pyc = pathname + ".pyc"
1100 file_pyo = pathname + ".pyo"
1101 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001102 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001103 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001105 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001106 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001107 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001108 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001109 try:
1110 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001111 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001112 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001113 fname = file_pyc
1114 else:
1115 fname = file_pyc
1116 archivename = os.path.split(fname)[1]
1117 if basename:
1118 archivename = "%s/%s" % (basename, archivename)
1119 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001120
1121
1122def main(args = None):
1123 import textwrap
1124 USAGE=textwrap.dedent("""\
1125 Usage:
1126 zipfile.py -l zipfile.zip # Show listing of a zipfile
1127 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1128 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1129 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1130 """)
1131 if args is None:
1132 args = sys.argv[1:]
1133
1134 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001135 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001136 sys.exit(1)
1137
1138 if args[0] == '-l':
1139 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001140 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001141 sys.exit(1)
1142 zf = ZipFile(args[1], 'r')
1143 zf.printdir()
1144 zf.close()
1145
1146 elif args[0] == '-t':
1147 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001148 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001149 sys.exit(1)
1150 zf = ZipFile(args[1], 'r')
1151 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001152 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001153
1154 elif args[0] == '-e':
1155 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001156 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001157 sys.exit(1)
1158
1159 zf = ZipFile(args[1], 'r')
1160 out = args[2]
1161 for path in zf.namelist():
1162 if path.startswith('./'):
1163 tgt = os.path.join(out, path[2:])
1164 else:
1165 tgt = os.path.join(out, path)
1166
1167 tgtdir = os.path.dirname(tgt)
1168 if not os.path.exists(tgtdir):
1169 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001170 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001171 fp.write(zf.read(path))
1172 fp.close()
1173 zf.close()
1174
1175 elif args[0] == '-c':
1176 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001177 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001178 sys.exit(1)
1179
1180 def addToZip(zf, path, zippath):
1181 if os.path.isfile(path):
1182 zf.write(path, zippath, ZIP_DEFLATED)
1183 elif os.path.isdir(path):
1184 for nm in os.listdir(path):
1185 addToZip(zf,
1186 os.path.join(path, nm), os.path.join(zippath, nm))
1187 # else: ignore
1188
1189 zf = ZipFile(args[1], 'w', allowZip64=True)
1190 for src in args[2:]:
1191 addToZip(zf, src, os.path.basename(src))
1192
1193 zf.close()
1194
1195if __name__ == "__main__":
1196 main()