blob: e1fdc7fa812a3521fb9065e3c4433eaa9b68aa11 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Martin v. Löwis00756902006-02-05 17:09:41 +00006import struct, os, time, sys
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000011except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000012 zlib = None
13
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000019
20
21class LargeZipFile(Exception):
22 """
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Thomas Wouters0e3f5912006-08-11 14:57:12 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
37structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000038stringEndArchive = b"PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000039structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000040stringCentralDir = b"PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000041structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000042stringFileHeader = b"PK\003\004" # magic number for file header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000044stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000046stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000047
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +000087 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Thomas Wouters0e3f5912006-08-11 14:57:12 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
104 if sig != stringEndArchive64Locator:
105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
110 # Assume no 'zip64 extensible data'
111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
117 if sig != stringEndArchive64:
118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
138 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000143 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000163 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
191 )
192
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000194 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000195
196 # Terminate the file name at the first null byte. Null bytes in file
197 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000198 null_byte = filename.find(chr(0))
199 if null_byte >= 0:
200 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 # This is used to ensure paths in generated ZIP files always use
202 # forward slashes as the directory separator, as required by the
203 # ZIP format specification.
204 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000205 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
Greg Ward8e36d282003-06-18 00:53:06 +0000207 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000209 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000210 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000211 self.comment = b"" # Comment for each file
212 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000213 if sys.platform == 'win32':
214 self.create_system = 0 # System which created ZIP archive
215 else:
216 # Assume everything else is unix-y
217 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000218 self.create_version = 20 # Version which created ZIP archive
219 self.extract_version = 20 # Version needed to extract archive
220 self.reserved = 0 # Must be zero
221 self.flag_bits = 0 # ZIP flag bits
222 self.volume = 0 # Volume number of file header
223 self.internal_attr = 0 # Internal attributes
224 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000225 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000226 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000227 # CRC CRC-32 of the uncompressed file
228 # compress_size Size of the compressed file
229 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000230
231 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000232 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233 dt = self.date_time
234 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000235 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000237 # Set these to zero because we write them after the file data
238 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 else:
Tim Peterse1190062001-01-15 03:34:38 +0000240 CRC = self.CRC
241 compress_size = self.compress_size
242 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000243
244 extra = self.extra
245
246 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
247 # File is larger than what fits into a 4 byte integer,
248 # fall back to the ZIP64 extension
249 fmt = '<hhqq'
250 extra = extra + struct.pack(fmt,
251 1, struct.calcsize(fmt)-4, file_size, compress_size)
252 file_size = 0xffffffff # -1
253 compress_size = 0xffffffff # -1
254 self.extract_version = max(45, self.extract_version)
255 self.create_version = max(45, self.extract_version)
256
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000257 header = struct.pack(structFileHeader, stringFileHeader,
258 self.extract_version, self.reserved, self.flag_bits,
259 self.compress_type, dostime, dosdate, CRC,
260 compress_size, file_size,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 len(self.filename), len(extra))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000262 return header + self.filename.encode("utf-8") + extra
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000263
264 def _decodeExtra(self):
265 # Try to decode the extra field.
266 extra = self.extra
267 unpack = struct.unpack
268 while extra:
269 tp, ln = unpack('<hh', extra[:4])
270 if tp == 1:
271 if ln >= 24:
272 counts = unpack('<qqq', extra[4:28])
273 elif ln == 16:
274 counts = unpack('<qq', extra[4:20])
275 elif ln == 8:
276 counts = unpack('<q', extra[4:12])
277 elif ln == 0:
278 counts = ()
279 else:
280 raise RuntimeError, "Corrupt extra field %s"%(ln,)
281
282 idx = 0
283
284 # ZIP64 extension (large files and/or large archives)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000285 if self.file_size == -1 or self.file_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286 self.file_size = counts[idx]
287 idx += 1
288
Guido van Rossume2a383d2007-01-15 16:59:06 +0000289 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 self.compress_size = counts[idx]
291 idx += 1
292
Guido van Rossume2a383d2007-01-15 16:59:06 +0000293 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 old = self.header_offset
295 self.header_offset = counts[idx]
296 idx+=1
297
298 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299
300
Thomas Wouterscf297e42007-02-23 15:07:44 +0000301class _ZipDecrypter:
302 """Class to handle decryption of files stored within a ZIP archive.
303
304 ZIP supports a password-based form of encryption. Even though known
305 plaintext attacks have been found against it, it is still useful
306 for low-level securicy.
307
308 Usage:
309 zd = _ZipDecrypter(mypwd)
310 plain_char = zd(cypher_char)
311 plain_text = map(zd, cypher_text)
312 """
313
314 def _GenerateCRCTable():
315 """Generate a CRC-32 table.
316
317 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
318 internal keys. We noticed that a direct implementation is faster than
319 relying on binascii.crc32().
320 """
321 poly = 0xedb88320
322 table = [0] * 256
323 for i in range(256):
324 crc = i
325 for j in range(8):
326 if crc & 1:
327 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
328 else:
329 crc = ((crc >> 1) & 0x7FFFFFFF)
330 table[i] = crc
331 return table
332 crctable = _GenerateCRCTable()
333
334 def _crc32(self, ch, crc):
335 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000336 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000337
338 def __init__(self, pwd):
339 self.key0 = 305419896
340 self.key1 = 591751049
341 self.key2 = 878082192
342 for p in pwd:
343 self._UpdateKeys(p)
344
345 def _UpdateKeys(self, c):
346 self.key0 = self._crc32(c, self.key0)
347 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
348 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000349 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000350
351 def __call__(self, c):
352 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000353 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000354 k = self.key2 | 2
355 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000356 self._UpdateKeys(c)
357 return c
358
Guido van Rossumd8faa362007-04-27 19:54:29 +0000359class ZipExtFile:
360 """File-like object for reading an archive member.
361 Is returned by ZipFile.open().
362 """
363
364 def __init__(self, fileobj, zipinfo, decrypt=None):
365 self.fileobj = fileobj
366 self.decrypter = decrypt
367 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000368 self.rawbuffer = b''
369 self.readbuffer = b''
370 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000371 self.eof = False
372 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000373 self.nlSeps = (b"\n", )
374 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375
376 self.compress_type = zipinfo.compress_type
377 self.compress_size = zipinfo.compress_size
378
379 self.closed = False
380 self.mode = "r"
381 self.name = zipinfo.filename
382
383 # read from compressed files in 64k blocks
384 self.compreadsize = 64*1024
385 if self.compress_type == ZIP_DEFLATED:
386 self.dc = zlib.decompressobj(-15)
387
388 def set_univ_newlines(self, univ_newlines):
389 self.univ_newlines = univ_newlines
390
391 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000392 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000393 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000394 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395
396 def __iter__(self):
397 return self
398
399 def __next__(self):
400 nextline = self.readline()
401 if not nextline:
402 raise StopIteration()
403
404 return nextline
405
406 def close(self):
407 self.closed = True
408
409 def _checkfornewline(self):
410 nl, nllen = -1, -1
411 if self.linebuffer:
412 # ugly check for cases where half of an \r\n pair was
413 # read on the last pass, and the \r was discarded. In this
414 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000415 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000416 self.linebuffer = self.linebuffer[1:]
417
418 for sep in self.nlSeps:
419 nl = self.linebuffer.find(sep)
420 if nl >= 0:
421 nllen = len(sep)
422 return nl, nllen
423
424 return nl, nllen
425
426 def readline(self, size = -1):
427 """Read a line with approx. size. If size is negative,
428 read a whole line.
429 """
430 if size < 0:
431 size = sys.maxint
432 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000433 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000434
435 # check for a newline already in buffer
436 nl, nllen = self._checkfornewline()
437
438 if nl >= 0:
439 # the next line was already in the buffer
440 nl = min(nl, size)
441 else:
442 # no line break in buffer - try to read more
443 size -= len(self.linebuffer)
444 while nl < 0 and size > 0:
445 buf = self.read(min(size, 100))
446 if not buf:
447 break
448 self.linebuffer += buf
449 size -= len(buf)
450
451 # check for a newline in buffer
452 nl, nllen = self._checkfornewline()
453
454 # we either ran out of bytes in the file, or
455 # met the specified size limit without finding a newline,
456 # so return current buffer
457 if nl < 0:
458 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000459 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 return s
461
462 buf = self.linebuffer[:nl]
463 self.lastdiscard = self.linebuffer[nl:nl + nllen]
464 self.linebuffer = self.linebuffer[nl + nllen:]
465
466 # line is always returned with \n as newline char (except possibly
467 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469
470 def readlines(self, sizehint = -1):
471 """Return a list with all (following) lines. The sizehint parameter
472 is ignored in this implementation.
473 """
474 result = []
475 while True:
476 line = self.readline()
477 if not line: break
478 result.append(line)
479 return result
480
481 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000482 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000483 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000484 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000485
486 # determine read size
487 bytesToRead = self.compress_size - self.bytes_read
488
489 # adjust read size for encrypted files since the first 12 bytes
490 # are for the encryption/password information
491 if self.decrypter is not None:
492 bytesToRead -= 12
493
494 if size is not None and size >= 0:
495 if self.compress_type == ZIP_STORED:
496 lr = len(self.readbuffer)
497 bytesToRead = min(bytesToRead, size - lr)
498 elif self.compress_type == ZIP_DEFLATED:
499 if len(self.readbuffer) > size:
500 # the user has requested fewer bytes than we've already
501 # pulled through the decompressor; don't read any more
502 bytesToRead = 0
503 else:
504 # user will use up the buffer, so read some more
505 lr = len(self.rawbuffer)
506 bytesToRead = min(bytesToRead, self.compreadsize - lr)
507
508 # avoid reading past end of file contents
509 if bytesToRead + self.bytes_read > self.compress_size:
510 bytesToRead = self.compress_size - self.bytes_read
511
512 # try to read from file (if necessary)
513 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000514 data = self.fileobj.read(bytesToRead)
515 self.bytes_read += len(data)
516 try:
517 self.rawbuffer += data
518 except:
519 print(repr(self.fileobj), repr(self.rawbuffer),
520 repr(data))
521 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000522
523 # handle contents of raw buffer
524 if self.rawbuffer:
525 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000526 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
528 # decrypt new data if we were given an object to handle that
529 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000530 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531
532 # decompress newly read data if necessary
533 if newdata and self.compress_type == ZIP_DEFLATED:
534 newdata = self.dc.decompress(newdata)
535 self.rawbuffer = self.dc.unconsumed_tail
536 if self.eof and len(self.rawbuffer) == 0:
537 # we're out of raw bytes (both from the file and
538 # the local buffer); flush just to make sure the
539 # decompressor is done
540 newdata += self.dc.flush()
541 # prevent decompressor from being used again
542 self.dc = None
543
544 self.readbuffer += newdata
545
546
547 # return what the user asked for
548 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000549 data = self.readbuffer
550 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000551 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000552 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 self.readbuffer = self.readbuffer[size:]
554
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000555 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
557
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000559 """ Class with methods to open, read, write, close, list zip files.
560
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000561 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000562
Fred Drake3d9091e2001-03-26 15:49:24 +0000563 file: Either the path to the file, or a file-like object.
564 If it is a path, the file will be opened and closed by ZipFile.
565 mode: The mode can be either read "r", write "w" or append "a".
566 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000567 allowZip64: if True ZipFile will create files with ZIP64 extensions when
568 needed, otherwise it will raise an exception when this would
569 be necessary.
570
Fred Drake3d9091e2001-03-26 15:49:24 +0000571 """
Fred Drake484d7352000-10-02 21:14:52 +0000572
Fred Drake90eac282001-02-28 05:29:34 +0000573 fp = None # Set here since __del__ checks it
574
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000575 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000576 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000577 if mode not in ("r", "w", "a"):
578 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
579
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000580 if compression == ZIP_STORED:
581 pass
582 elif compression == ZIP_DEFLATED:
583 if not zlib:
584 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000585 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000586 else:
587 raise RuntimeError, "That compression method is not supported"
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000588
589 self._allowZip64 = allowZip64
590 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000591 self.debug = 0 # Level of printing: 0 through 3
592 self.NameToInfo = {} # Find file info given name
593 self.filelist = [] # List of ZipInfo instances for archive
594 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000595 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000596 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000597
Fred Drake3d9091e2001-03-26 15:49:24 +0000598 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000599 if isinstance(file, basestring):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000600 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000601 self._filePassed = 0
602 self.filename = file
603 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000604 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000605 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000606 except IOError:
607 if mode == 'a':
608 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000609 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000610 else:
611 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000612 else:
613 self._filePassed = 1
614 self.fp = file
615 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000616
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000618 self._GetContents()
619 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000620 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000621 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000622 try: # See if file is a zip file
623 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000625 self.fp.seek(self.start_dir, 0)
626 except BadZipfile: # file is not a zip file, just append
627 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000629 if not self._filePassed:
630 self.fp.close()
631 self.fp = None
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000632 raise RuntimeError, 'Mode must be "r", "w" or "a"'
633
634 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000635 """Read the directory, making sure we close the file if the format
636 is bad."""
637 try:
638 self._RealGetContents()
639 except BadZipfile:
640 if not self._filePassed:
641 self.fp.close()
642 self.fp = None
643 raise
644
645 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000646 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000647 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000648 endrec = _EndRecData(fp)
649 if not endrec:
650 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000651 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000652 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000653 size_cd = endrec[5] # bytes in central directory
654 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000655 self.comment = endrec[8] # archive comment
656 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000657 if endrec[9] > ZIP64_LIMIT:
658 x = endrec[9] - size_cd - 56 - 20
659 else:
660 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000661 # "concat" is zero, unless zip was concatenated to another file
662 concat = x - offset_cd
663 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000664 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665 # self.start_dir: Position of start of central directory
666 self.start_dir = offset_cd + concat
667 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000669 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000670 total = 0
671 while total < size_cd:
672 centdir = fp.read(46)
673 total = total + 46
674 if centdir[0:4] != stringCentralDir:
675 raise BadZipfile, "Bad magic number for central directory"
676 centdir = struct.unpack(structCentralDir, centdir)
677 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000678 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000679 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000680 # Create ZipInfo instance to store file information
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000681 x = ZipInfo(str(filename))
Fred Drake3e038e52001-02-28 17:56:26 +0000682 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
683 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
684 total = (total + centdir[_CD_FILENAME_LENGTH]
685 + centdir[_CD_EXTRA_FIELD_LENGTH]
686 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000687 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000688 (x.create_version, x.create_system, x.extract_version, x.reserved,
689 x.flag_bits, x.compress_type, t, d,
690 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
691 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
692 # Convert date/time code to (year, month, day, hour, min, sec)
693 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000694 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000695
696 x._decodeExtra()
697 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 self.filelist.append(x)
699 self.NameToInfo[x.filename] = x
700 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000701 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000702
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703
704 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000705 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 l = []
707 for data in self.filelist:
708 l.append(data.filename)
709 return l
710
711 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000712 """Return a list of class ZipInfo instances for files in the
713 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 return self.filelist
715
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000716 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000717 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000718 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
719 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 for zinfo in self.filelist:
721 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000722 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
723 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724
725 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000726 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 for zinfo in self.filelist:
728 try:
Tim Peterse1190062001-01-15 03:34:38 +0000729 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000730 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 return zinfo.filename
732
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000733
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000735 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000736 info = self.NameToInfo.get(name)
737 if info is None:
738 raise KeyError(
739 'There is no item named %r in the archive' % name)
740
741 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742
Thomas Wouterscf297e42007-02-23 15:07:44 +0000743 def setpassword(self, pwd):
744 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000745 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000746 self.pwd = pwd
747
748 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000749 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000750 return self.open(name, "r", pwd).read()
751
752 def open(self, name, mode="r", pwd=None):
753 """Return file-like object for 'name'."""
754 if mode not in ("r", "U", "rU"):
755 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 if not self.fp:
757 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000758 "Attempt to read ZIP archive that was already closed"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000759
Guido van Rossumd8faa362007-04-27 19:54:29 +0000760 # Only open a new file for instances where we were not
761 # given a file object in the constructor
762 if self._filePassed:
763 zef_file = self.fp
764 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000765 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766
767 # Get info object for name
768 zinfo = self.getinfo(name)
769
770 filepos = zef_file.tell()
771
772 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000773
774 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000776 if fheader[0:4] != stringFileHeader:
777 raise BadZipfile, "Bad magic number for file header"
778
779 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000781 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000783
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000784 if fname != zinfo.orig_filename.encode("utf-8"):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000785 raise BadZipfile, \
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000786 'File name in directory %r and header %r differ.' % (
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000787 zinfo.orig_filename, fname)
788
Guido van Rossumd8faa362007-04-27 19:54:29 +0000789 # check for encrypted flag & handle password
790 is_encrypted = zinfo.flag_bits & 0x1
791 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000792 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793 if not pwd:
794 pwd = self.pwd
795 if not pwd:
796 raise RuntimeError, "File %s is encrypted, " \
797 "password required for extraction" % name
798
Thomas Wouterscf297e42007-02-23 15:07:44 +0000799 zd = _ZipDecrypter(pwd)
800 # The first 12 bytes in the cypher stream is an encryption header
801 # used to strengthen the algorithm. The first 11 bytes are
802 # completely random, while the 12th contains the MSB of the CRC,
803 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000805 h = list(map(zd, bytes[0:12]))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000806 if h[11] != ((zinfo.CRC>>24) & 255):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000807 raise RuntimeError, "Bad password for file %s" % name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808
809 # build and return a ZipExtFile
810 if zd is None:
811 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000813 zef = ZipExtFile(zef_file, zinfo, zd)
814
815 # set universal newlines on ZipExtFile if necessary
816 if "U" in mode:
817 zef.set_univ_newlines(True)
818 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819
820 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000821 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000822 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000823 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000824 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 if self.mode not in ("w", "a"):
826 raise RuntimeError, 'write() requires mode "w" or "a"'
827 if not self.fp:
828 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000829 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
831 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000832 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
834 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000835 "That compression method is not supported"
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000836 if zinfo.file_size > ZIP64_LIMIT:
837 if not self._allowZip64:
838 raise LargeZipFile("Filesize would require ZIP64 extensions")
839 if zinfo.header_offset > ZIP64_LIMIT:
840 if not self._allowZip64:
841 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842
843 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000844 """Put the bytes from filename into the archive under the name
845 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000846 if not self.fp:
847 raise RuntimeError(
848 "Attempt to write to ZIP archive that was already closed")
849
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000851 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852 date_time = mtime[0:6]
853 # Create ZipInfo instance to store file information
854 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000855 arcname = filename
856 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
857 while arcname[0] in (os.sep, os.altsep):
858 arcname = arcname[1:]
859 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000860 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000861 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000862 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000863 else:
Tim Peterse1190062001-01-15 03:34:38 +0000864 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000865
866 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000867 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000868 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000869
870 self._writecheck(zinfo)
871 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000872 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000873 # Must overwrite CRC and sizes with correct data later
874 zinfo.CRC = CRC = 0
875 zinfo.compress_size = compress_size = 0
876 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000878 if zinfo.compress_type == ZIP_DEFLATED:
879 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
880 zlib.DEFLATED, -15)
881 else:
882 cmpr = None
883 while 1:
884 buf = fp.read(1024 * 8)
885 if not buf:
886 break
887 file_size = file_size + len(buf)
888 CRC = binascii.crc32(buf, CRC)
889 if cmpr:
890 buf = cmpr.compress(buf)
891 compress_size = compress_size + len(buf)
892 self.fp.write(buf)
893 fp.close()
894 if cmpr:
895 buf = cmpr.flush()
896 compress_size = compress_size + len(buf)
897 self.fp.write(buf)
898 zinfo.compress_size = compress_size
899 else:
900 zinfo.compress_size = file_size
901 zinfo.CRC = CRC
902 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000903 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000904 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000905 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000906 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000908 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 self.filelist.append(zinfo)
910 self.NameToInfo[zinfo.filename] = zinfo
911
Just van Rossumb083cb32002-12-12 12:23:32 +0000912 def writestr(self, zinfo_or_arcname, bytes):
Fred Drake484d7352000-10-02 21:14:52 +0000913 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +0000914 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
915 the name of the file in the archive."""
916 if not isinstance(zinfo_or_arcname, ZipInfo):
917 zinfo = ZipInfo(filename=zinfo_or_arcname,
918 date_time=time.localtime(time.time()))
919 zinfo.compress_type = self.compression
920 else:
921 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000922
923 if not self.fp:
924 raise RuntimeError(
925 "Attempt to write to ZIP archive that was already closed")
926
Tim Peterse1190062001-01-15 03:34:38 +0000927 zinfo.file_size = len(bytes) # Uncompressed size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000928 zinfo.header_offset = self.fp.tell() # Start of header bytes
929 self._writecheck(zinfo)
930 self._didModify = True
Tim Peterse1190062001-01-15 03:34:38 +0000931 zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000932 if zinfo.compress_type == ZIP_DEFLATED:
933 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
934 zlib.DEFLATED, -15)
935 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +0000936 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000937 else:
938 zinfo.compress_size = zinfo.file_size
Tim Peterse1190062001-01-15 03:34:38 +0000939 zinfo.header_offset = self.fp.tell() # Start of header bytes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000940 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000941 self.fp.write(bytes)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000942 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000943 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000944 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +0000945 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +0000946 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000947 self.filelist.append(zinfo)
948 self.NameToInfo[zinfo.filename] = zinfo
949
950 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +0000951 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000952 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000953
954 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +0000955 """Close the file, and for mode "w" and "a" write the ending
956 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +0000957 if self.fp is None:
958 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000959
960 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000961 count = 0
962 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +0000963 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 count = count + 1
965 dt = zinfo.date_time
966 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000967 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000968 extra = []
969 if zinfo.file_size > ZIP64_LIMIT \
970 or zinfo.compress_size > ZIP64_LIMIT:
971 extra.append(zinfo.file_size)
972 extra.append(zinfo.compress_size)
973 file_size = 0xffffffff #-1
974 compress_size = 0xffffffff #-1
975 else:
976 file_size = zinfo.file_size
977 compress_size = zinfo.compress_size
978
979 if zinfo.header_offset > ZIP64_LIMIT:
980 extra.append(zinfo.header_offset)
981 header_offset = -1 # struct "l" format: 32 one bits
982 else:
983 header_offset = zinfo.header_offset
984
985 extra_data = zinfo.extra
986 if extra:
987 # Append a ZIP64 field to the extra's
988 extra_data = struct.pack(
989 '<hh' + 'q'*len(extra),
990 1, 8*len(extra), *extra) + extra_data
991
992 extract_version = max(45, zinfo.extract_version)
993 create_version = max(45, zinfo.create_version)
994 else:
995 extract_version = zinfo.extract_version
996 create_version = zinfo.create_version
997
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000998 centdir = struct.pack(structCentralDir,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000999 stringCentralDir, create_version,
1000 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001002 zinfo.CRC, compress_size, file_size,
1003 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 0, zinfo.internal_attr, zinfo.external_attr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001005 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006 self.fp.write(centdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001007 self.fp.write(zinfo.filename.encode("utf-8"))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001008 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001009 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001010
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001011 pos2 = self.fp.tell()
1012 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001013 if pos1 > ZIP64_LIMIT:
1014 # Need to write the ZIP64 end-of-archive records
1015 zip64endrec = struct.pack(
1016 structEndArchive64, stringEndArchive64,
1017 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1018 self.fp.write(zip64endrec)
1019
1020 zip64locrec = struct.pack(
1021 structEndArchive64Locator,
1022 stringEndArchive64Locator, 0, pos2, 1)
1023 self.fp.write(zip64locrec)
1024
1025 # XXX Why is `pos3` computed next? It's never referenced.
1026 pos3 = self.fp.tell()
1027 endrec = struct.pack(structEndArchive, stringEndArchive,
1028 0, 0, count, count, pos2 - pos1, -1, 0)
1029 self.fp.write(endrec)
1030
1031 else:
1032 endrec = struct.pack(structEndArchive, stringEndArchive,
1033 0, 0, count, count, pos2 - pos1, pos1, 0)
1034 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001035 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001036 if not self._filePassed:
1037 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001038 self.fp = None
1039
1040
1041class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001042 """Class to create ZIP archives with Python library files and packages."""
1043
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001044 def writepy(self, pathname, basename = ""):
1045 """Add all files from "pathname" to the ZIP archive.
1046
Fred Drake484d7352000-10-02 21:14:52 +00001047 If pathname is a package directory, search the directory and
1048 all package subdirectories recursively for all *.py and enter
1049 the modules into the archive. If pathname is a plain
1050 directory, listdir *.py and enter all modules. Else, pathname
1051 must be a Python *.py file and the module will be put into the
1052 archive. Added modules are always module.pyo or module.pyc.
1053 This method will compile the module.py into module.pyc if
1054 necessary.
1055 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 dir, name = os.path.split(pathname)
1057 if os.path.isdir(pathname):
1058 initname = os.path.join(pathname, "__init__.py")
1059 if os.path.isfile(initname):
1060 # This is a package directory, add it
1061 if basename:
1062 basename = "%s/%s" % (basename, name)
1063 else:
1064 basename = name
1065 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001066 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001067 fname, arcname = self._get_codename(initname[0:-3], basename)
1068 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001069 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001070 self.write(fname, arcname)
1071 dirlist = os.listdir(pathname)
1072 dirlist.remove("__init__.py")
1073 # Add all *.py files and package subdirectories
1074 for filename in dirlist:
1075 path = os.path.join(pathname, filename)
1076 root, ext = os.path.splitext(filename)
1077 if os.path.isdir(path):
1078 if os.path.isfile(os.path.join(path, "__init__.py")):
1079 # This is a package directory, add it
1080 self.writepy(path, basename) # Recursive call
1081 elif ext == ".py":
1082 fname, arcname = self._get_codename(path[0:-3],
1083 basename)
1084 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001085 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001086 self.write(fname, arcname)
1087 else:
1088 # This is NOT a package directory, add its files at top level
1089 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001090 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 for filename in os.listdir(pathname):
1092 path = os.path.join(pathname, filename)
1093 root, ext = os.path.splitext(filename)
1094 if ext == ".py":
1095 fname, arcname = self._get_codename(path[0:-3],
1096 basename)
1097 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001098 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001099 self.write(fname, arcname)
1100 else:
1101 if pathname[-3:] != ".py":
1102 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001103 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 fname, arcname = self._get_codename(pathname[0:-3], basename)
1105 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001106 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001107 self.write(fname, arcname)
1108
1109 def _get_codename(self, pathname, basename):
1110 """Return (filename, archivename) for the path.
1111
Fred Drake484d7352000-10-02 21:14:52 +00001112 Given a module name path, return the correct file path and
1113 archive name, compiling if necessary. For example, given
1114 /python/lib/string, return (/python/lib/string.pyc, string).
1115 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 file_py = pathname + ".py"
1117 file_pyc = pathname + ".pyc"
1118 file_pyo = pathname + ".pyo"
1119 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001120 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001121 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001123 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001124 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001126 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001127 try:
1128 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001129 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001130 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001131 fname = file_pyc
1132 else:
1133 fname = file_pyc
1134 archivename = os.path.split(fname)[1]
1135 if basename:
1136 archivename = "%s/%s" % (basename, archivename)
1137 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001138
1139
1140def main(args = None):
1141 import textwrap
1142 USAGE=textwrap.dedent("""\
1143 Usage:
1144 zipfile.py -l zipfile.zip # Show listing of a zipfile
1145 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1146 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1147 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1148 """)
1149 if args is None:
1150 args = sys.argv[1:]
1151
1152 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001153 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001154 sys.exit(1)
1155
1156 if args[0] == '-l':
1157 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001158 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001159 sys.exit(1)
1160 zf = ZipFile(args[1], 'r')
1161 zf.printdir()
1162 zf.close()
1163
1164 elif args[0] == '-t':
1165 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001166 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001167 sys.exit(1)
1168 zf = ZipFile(args[1], 'r')
1169 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001170 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001171
1172 elif args[0] == '-e':
1173 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001174 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001175 sys.exit(1)
1176
1177 zf = ZipFile(args[1], 'r')
1178 out = args[2]
1179 for path in zf.namelist():
1180 if path.startswith('./'):
1181 tgt = os.path.join(out, path[2:])
1182 else:
1183 tgt = os.path.join(out, path)
1184
1185 tgtdir = os.path.dirname(tgt)
1186 if not os.path.exists(tgtdir):
1187 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001188 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001189 fp.write(zf.read(path))
1190 fp.close()
1191 zf.close()
1192
1193 elif args[0] == '-c':
1194 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001195 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001196 sys.exit(1)
1197
1198 def addToZip(zf, path, zippath):
1199 if os.path.isfile(path):
1200 zf.write(path, zippath, ZIP_DEFLATED)
1201 elif os.path.isdir(path):
1202 for nm in os.listdir(path):
1203 addToZip(zf,
1204 os.path.join(path, nm), os.path.join(zippath, nm))
1205 # else: ignore
1206
1207 zf = ZipFile(args[1], 'w', allowZip64=True)
1208 for src in args[2:]:
1209 addToZip(zf, src, os.path.basename(src))
1210
1211 zf.close()
1212
1213if __name__ == "__main__":
1214 main()