blob: 2d308c368d2a936e7c8aeac980b55c6b15970a8b [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Guido van Rossum9c673f32001-04-10 15:37:12 +000011except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000012 zlib = None
13
Skip Montanaro40fc1602001-03-01 04:27:19 +000014__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000015 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000016
Fred Drake5db246d2000-09-29 20:44:48 +000017class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000019
20
21class LargeZipFile(Exception):
22 """
23 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
24 and those extensions are disabled.
25 """
26
Tim Peterse1190062001-01-15 03:34:38 +000027error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Thomas Wouters0e3f5912006-08-11 14:57:12 +000029ZIP64_LIMIT= (1 << 31) - 1
30
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031# constants for Zip file compression methods
32ZIP_STORED = 0
33ZIP_DEFLATED = 8
34# Other ZIP compression methods not supported
35
36# Here are some struct module formats for reading headers
37structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000038stringEndArchive = b"PK\005\006" # magic number for end of archive record
Brett Cannonff450f72004-07-10 19:09:20 +000039structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000040stringCentralDir = b"PK\001\002" # magic number for central directory
Brett Cannonff450f72004-07-10 19:09:20 +000041structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000042stringFileHeader = b"PK\003\004" # magic number for file header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000044stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
Guido van Rossumd6ca5462007-05-22 01:29:33 +000046stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
Thomas Wouters0e3f5912006-08-11 14:57:12 +000047
Guido van Rossum32abe6f2000-03-31 17:30:02 +000048
Fred Drake3e038e52001-02-28 17:56:26 +000049# indexes of entries in the central directory structure
50_CD_SIGNATURE = 0
51_CD_CREATE_VERSION = 1
52_CD_CREATE_SYSTEM = 2
53_CD_EXTRACT_VERSION = 3
54_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
55_CD_FLAG_BITS = 5
56_CD_COMPRESS_TYPE = 6
57_CD_TIME = 7
58_CD_DATE = 8
59_CD_CRC = 9
60_CD_COMPRESSED_SIZE = 10
61_CD_UNCOMPRESSED_SIZE = 11
62_CD_FILENAME_LENGTH = 12
63_CD_EXTRA_FIELD_LENGTH = 13
64_CD_COMMENT_LENGTH = 14
65_CD_DISK_NUMBER_START = 15
66_CD_INTERNAL_FILE_ATTRIBUTES = 16
67_CD_EXTERNAL_FILE_ATTRIBUTES = 17
68_CD_LOCAL_HEADER_OFFSET = 18
69
70# indexes of entries in the local file header structure
71_FH_SIGNATURE = 0
72_FH_EXTRACT_VERSION = 1
73_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
74_FH_GENERAL_PURPOSE_FLAG_BITS = 3
75_FH_COMPRESSION_METHOD = 4
76_FH_LAST_MOD_TIME = 5
77_FH_LAST_MOD_DATE = 6
78_FH_CRC = 7
79_FH_COMPRESSED_SIZE = 8
80_FH_UNCOMPRESSED_SIZE = 9
81_FH_FILENAME_LENGTH = 10
82_FH_EXTRA_FIELD_LENGTH = 11
83
Guido van Rossum32abe6f2000-03-31 17:30:02 +000084def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000085 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +000086 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +000087 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000088 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +000089 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +000090 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +000091 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +000092 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000093 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +000095
Thomas Wouters0e3f5912006-08-11 14:57:12 +000096def _EndRecData64(fpin, offset, endrec):
97 """
98 Read the ZIP64 end-of-archive records and use that to update endrec
99 """
100 locatorSize = struct.calcsize(structEndArchive64Locator)
101 fpin.seek(offset - locatorSize, 2)
102 data = fpin.read(locatorSize)
103 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
104 if sig != stringEndArchive64Locator:
105 return endrec
106
107 if diskno != 0 or disks != 1:
108 raise BadZipfile("zipfiles that span multiple disks are not supported")
109
110 # Assume no 'zip64 extensible data'
111 endArchiveSize = struct.calcsize(structEndArchive64)
112 fpin.seek(offset - locatorSize - endArchiveSize, 2)
113 data = fpin.read(endArchiveSize)
114 sig, sz, create_version, read_version, disk_num, disk_dir, \
115 dircount, dircount2, dirsize, diroffset = \
116 struct.unpack(structEndArchive64, data)
117 if sig != stringEndArchive64:
118 return endrec
119
120 # Update the original endrec using data from the ZIP64 record
121 endrec[1] = disk_num
122 endrec[2] = disk_dir
123 endrec[3] = dircount
124 endrec[4] = dircount2
125 endrec[5] = dirsize
126 endrec[6] = diroffset
127 return endrec
128
129
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000130def _EndRecData(fpin):
131 """Return data from the "End of Central Directory" record, or None.
132
133 The data is a list of the nine items in the ZIP "End of central dir"
134 record followed by a tenth item, the file seek offset of this record."""
135 fpin.seek(-22, 2) # Assume no archive comment.
136 filesize = fpin.tell() + 22 # Get file size
137 data = fpin.read()
Jeremy Hylton9ff05b22007-08-29 19:09:54 +0000138 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000139 endrec = struct.unpack(structEndArchive, data)
140 endrec = list(endrec)
141 endrec.append("") # Append the archive comment
142 endrec.append(filesize - 22) # Append the record start offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000143 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
144 return _EndRecData64(fpin, -22, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000145 return endrec
146 # Search the last END_BLOCK bytes of the file for the record signature.
147 # The comment is appended to the ZIP file and has a 16 bit length.
148 # So the comment may be up to 64K long. We limit the search for the
149 # signature to a few Kbytes at the end of the file for efficiency.
150 # also, the signature must not appear in the comment.
151 END_BLOCK = min(filesize, 1024 * 4)
152 fpin.seek(filesize - END_BLOCK, 0)
153 data = fpin.read()
154 start = data.rfind(stringEndArchive)
155 if start >= 0: # Correct signature string was found
156 endrec = struct.unpack(structEndArchive, data[start:start+22])
157 endrec = list(endrec)
158 comment = data[start+22:]
159 if endrec[7] == len(comment): # Comment length checks out
160 # Append the archive comment and start offset
161 endrec.append(comment)
162 endrec.append(filesize - END_BLOCK + start)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000163 if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
164 return _EndRecData64(fpin, - END_BLOCK + start, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000165 return endrec
166 return # Error, return None
167
Fred Drake484d7352000-10-02 21:14:52 +0000168
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000169class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000170 """Class with attributes describing each file in the ZIP archive."""
171
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000172 __slots__ = (
173 'orig_filename',
174 'filename',
175 'date_time',
176 'compress_type',
177 'comment',
178 'extra',
179 'create_system',
180 'create_version',
181 'extract_version',
182 'reserved',
183 'flag_bits',
184 'volume',
185 'internal_attr',
186 'external_attr',
187 'header_offset',
188 'CRC',
189 'compress_size',
190 'file_size',
191 )
192
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000194 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000195
196 # Terminate the file name at the first null byte. Null bytes in file
197 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000198 null_byte = filename.find(chr(0))
199 if null_byte >= 0:
200 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201 # This is used to ensure paths in generated ZIP files always use
202 # forward slashes as the directory separator, as required by the
203 # ZIP format specification.
204 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000205 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
Greg Ward8e36d282003-06-18 00:53:06 +0000207 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000208 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000209 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000210 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000211 self.comment = b"" # Comment for each file
212 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000213 if sys.platform == 'win32':
214 self.create_system = 0 # System which created ZIP archive
215 else:
216 # Assume everything else is unix-y
217 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000218 self.create_version = 20 # Version which created ZIP archive
219 self.extract_version = 20 # Version needed to extract archive
220 self.reserved = 0 # Must be zero
221 self.flag_bits = 0 # ZIP flag bits
222 self.volume = 0 # Volume number of file header
223 self.internal_attr = 0 # Internal attributes
224 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000225 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000226 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000227 # CRC CRC-32 of the uncompressed file
228 # compress_size Size of the compressed file
229 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000230
231 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000232 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000233 dt = self.date_time
234 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000235 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000236 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000237 # Set these to zero because we write them after the file data
238 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000239 else:
Tim Peterse1190062001-01-15 03:34:38 +0000240 CRC = self.CRC
241 compress_size = self.compress_size
242 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000243
244 extra = self.extra
245
246 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
247 # File is larger than what fits into a 4 byte integer,
248 # fall back to the ZIP64 extension
249 fmt = '<hhqq'
250 extra = extra + struct.pack(fmt,
251 1, struct.calcsize(fmt)-4, file_size, compress_size)
252 file_size = 0xffffffff # -1
253 compress_size = 0xffffffff # -1
254 self.extract_version = max(45, self.extract_version)
255 self.create_version = max(45, self.extract_version)
256
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000257 header = struct.pack(structFileHeader, stringFileHeader,
258 self.extract_version, self.reserved, self.flag_bits,
259 self.compress_type, dostime, dosdate, CRC,
260 compress_size, file_size,
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 len(self.filename), len(extra))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000262 return header + self.filename.encode("utf-8") + extra
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000263
264 def _decodeExtra(self):
265 # Try to decode the extra field.
266 extra = self.extra
267 unpack = struct.unpack
268 while extra:
269 tp, ln = unpack('<hh', extra[:4])
270 if tp == 1:
271 if ln >= 24:
272 counts = unpack('<qqq', extra[4:28])
273 elif ln == 16:
274 counts = unpack('<qq', extra[4:20])
275 elif ln == 8:
276 counts = unpack('<q', extra[4:12])
277 elif ln == 0:
278 counts = ()
279 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000280 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000281
282 idx = 0
283
284 # ZIP64 extension (large files and/or large archives)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000285 if self.file_size == -1 or self.file_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286 self.file_size = counts[idx]
287 idx += 1
288
Guido van Rossume2a383d2007-01-15 16:59:06 +0000289 if self.compress_size == -1 or self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 self.compress_size = counts[idx]
291 idx += 1
292
Guido van Rossume2a383d2007-01-15 16:59:06 +0000293 if self.header_offset == -1 or self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 old = self.header_offset
295 self.header_offset = counts[idx]
296 idx+=1
297
298 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000299
300
Thomas Wouterscf297e42007-02-23 15:07:44 +0000301class _ZipDecrypter:
302 """Class to handle decryption of files stored within a ZIP archive.
303
304 ZIP supports a password-based form of encryption. Even though known
305 plaintext attacks have been found against it, it is still useful
306 for low-level securicy.
307
308 Usage:
309 zd = _ZipDecrypter(mypwd)
310 plain_char = zd(cypher_char)
311 plain_text = map(zd, cypher_text)
312 """
313
314 def _GenerateCRCTable():
315 """Generate a CRC-32 table.
316
317 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
318 internal keys. We noticed that a direct implementation is faster than
319 relying on binascii.crc32().
320 """
321 poly = 0xedb88320
322 table = [0] * 256
323 for i in range(256):
324 crc = i
325 for j in range(8):
326 if crc & 1:
327 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
328 else:
329 crc = ((crc >> 1) & 0x7FFFFFFF)
330 table[i] = crc
331 return table
332 crctable = _GenerateCRCTable()
333
334 def _crc32(self, ch, crc):
335 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000336 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000337
338 def __init__(self, pwd):
339 self.key0 = 305419896
340 self.key1 = 591751049
341 self.key2 = 878082192
342 for p in pwd:
343 self._UpdateKeys(p)
344
345 def _UpdateKeys(self, c):
346 self.key0 = self._crc32(c, self.key0)
347 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
348 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000349 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000350
351 def __call__(self, c):
352 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000353 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000354 k = self.key2 | 2
355 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000356 self._UpdateKeys(c)
357 return c
358
Guido van Rossumd8faa362007-04-27 19:54:29 +0000359class ZipExtFile:
360 """File-like object for reading an archive member.
361 Is returned by ZipFile.open().
362 """
363
364 def __init__(self, fileobj, zipinfo, decrypt=None):
365 self.fileobj = fileobj
366 self.decrypter = decrypt
367 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000368 self.rawbuffer = b''
369 self.readbuffer = b''
370 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000371 self.eof = False
372 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000373 self.nlSeps = (b"\n", )
374 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000375
376 self.compress_type = zipinfo.compress_type
377 self.compress_size = zipinfo.compress_size
378
379 self.closed = False
380 self.mode = "r"
381 self.name = zipinfo.filename
382
383 # read from compressed files in 64k blocks
384 self.compreadsize = 64*1024
385 if self.compress_type == ZIP_DEFLATED:
386 self.dc = zlib.decompressobj(-15)
387
388 def set_univ_newlines(self, univ_newlines):
389 self.univ_newlines = univ_newlines
390
391 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000392 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000393 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000394 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000395
396 def __iter__(self):
397 return self
398
399 def __next__(self):
400 nextline = self.readline()
401 if not nextline:
402 raise StopIteration()
403
404 return nextline
405
406 def close(self):
407 self.closed = True
408
409 def _checkfornewline(self):
410 nl, nllen = -1, -1
411 if self.linebuffer:
412 # ugly check for cases where half of an \r\n pair was
413 # read on the last pass, and the \r was discarded. In this
414 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000415 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000416 self.linebuffer = self.linebuffer[1:]
417
418 for sep in self.nlSeps:
419 nl = self.linebuffer.find(sep)
420 if nl >= 0:
421 nllen = len(sep)
422 return nl, nllen
423
424 return nl, nllen
425
426 def readline(self, size = -1):
427 """Read a line with approx. size. If size is negative,
428 read a whole line.
429 """
430 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000431 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000432 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000433 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000434
435 # check for a newline already in buffer
436 nl, nllen = self._checkfornewline()
437
438 if nl >= 0:
439 # the next line was already in the buffer
440 nl = min(nl, size)
441 else:
442 # no line break in buffer - try to read more
443 size -= len(self.linebuffer)
444 while nl < 0 and size > 0:
445 buf = self.read(min(size, 100))
446 if not buf:
447 break
448 self.linebuffer += buf
449 size -= len(buf)
450
451 # check for a newline in buffer
452 nl, nllen = self._checkfornewline()
453
454 # we either ran out of bytes in the file, or
455 # met the specified size limit without finding a newline,
456 # so return current buffer
457 if nl < 0:
458 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000459 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000460 return s
461
462 buf = self.linebuffer[:nl]
463 self.lastdiscard = self.linebuffer[nl:nl + nllen]
464 self.linebuffer = self.linebuffer[nl + nllen:]
465
466 # line is always returned with \n as newline char (except possibly
467 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469
470 def readlines(self, sizehint = -1):
471 """Return a list with all (following) lines. The sizehint parameter
472 is ignored in this implementation.
473 """
474 result = []
475 while True:
476 line = self.readline()
477 if not line: break
478 result.append(line)
479 return result
480
481 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000482 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000483 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000484 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000485
486 # determine read size
487 bytesToRead = self.compress_size - self.bytes_read
488
489 # adjust read size for encrypted files since the first 12 bytes
490 # are for the encryption/password information
491 if self.decrypter is not None:
492 bytesToRead -= 12
493
494 if size is not None and size >= 0:
495 if self.compress_type == ZIP_STORED:
496 lr = len(self.readbuffer)
497 bytesToRead = min(bytesToRead, size - lr)
498 elif self.compress_type == ZIP_DEFLATED:
499 if len(self.readbuffer) > size:
500 # the user has requested fewer bytes than we've already
501 # pulled through the decompressor; don't read any more
502 bytesToRead = 0
503 else:
504 # user will use up the buffer, so read some more
505 lr = len(self.rawbuffer)
506 bytesToRead = min(bytesToRead, self.compreadsize - lr)
507
508 # avoid reading past end of file contents
509 if bytesToRead + self.bytes_read > self.compress_size:
510 bytesToRead = self.compress_size - self.bytes_read
511
512 # try to read from file (if necessary)
513 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000514 data = self.fileobj.read(bytesToRead)
515 self.bytes_read += len(data)
516 try:
517 self.rawbuffer += data
518 except:
519 print(repr(self.fileobj), repr(self.rawbuffer),
520 repr(data))
521 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000522
523 # handle contents of raw buffer
524 if self.rawbuffer:
525 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000526 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
528 # decrypt new data if we were given an object to handle that
529 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000530 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531
532 # decompress newly read data if necessary
533 if newdata and self.compress_type == ZIP_DEFLATED:
534 newdata = self.dc.decompress(newdata)
535 self.rawbuffer = self.dc.unconsumed_tail
536 if self.eof and len(self.rawbuffer) == 0:
537 # we're out of raw bytes (both from the file and
538 # the local buffer); flush just to make sure the
539 # decompressor is done
540 newdata += self.dc.flush()
541 # prevent decompressor from being used again
542 self.dc = None
543
544 self.readbuffer += newdata
545
546
547 # return what the user asked for
548 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000549 data = self.readbuffer
550 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000551 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000552 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 self.readbuffer = self.readbuffer[size:]
554
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000555 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
557
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000558class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000559 """ Class with methods to open, read, write, close, list zip files.
560
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000561 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000562
Fred Drake3d9091e2001-03-26 15:49:24 +0000563 file: Either the path to the file, or a file-like object.
564 If it is a path, the file will be opened and closed by ZipFile.
565 mode: The mode can be either read "r", write "w" or append "a".
566 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000567 allowZip64: if True ZipFile will create files with ZIP64 extensions when
568 needed, otherwise it will raise an exception when this would
569 be necessary.
570
Fred Drake3d9091e2001-03-26 15:49:24 +0000571 """
Fred Drake484d7352000-10-02 21:14:52 +0000572
Fred Drake90eac282001-02-28 05:29:34 +0000573 fp = None # Set here since __del__ checks it
574
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000575 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000576 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000577 if mode not in ("r", "w", "a"):
578 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
579
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000580 if compression == ZIP_STORED:
581 pass
582 elif compression == ZIP_DEFLATED:
583 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000584 raise RuntimeError(
585 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000586 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000587 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000588
589 self._allowZip64 = allowZip64
590 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000591 self.debug = 0 # Level of printing: 0 through 3
592 self.NameToInfo = {} # Find file info given name
593 self.filelist = [] # List of ZipInfo instances for archive
594 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000595 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000596 self.pwd = None
Tim Petersa19a1682001-03-29 04:36:09 +0000597
Fred Drake3d9091e2001-03-26 15:49:24 +0000598 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000599 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000600 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000601 self._filePassed = 0
602 self.filename = file
603 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000604 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000605 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000606 except IOError:
607 if mode == 'a':
608 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000609 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000610 else:
611 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000612 else:
613 self._filePassed = 1
614 self.fp = file
615 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000616
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000617 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000618 self._GetContents()
619 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000620 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000621 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000622 try: # See if file is a zip file
623 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000624 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000625 self.fp.seek(self.start_dir, 0)
626 except BadZipfile: # file is not a zip file, just append
627 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000628 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000629 if not self._filePassed:
630 self.fp.close()
631 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000632 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000633
634 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000635 """Read the directory, making sure we close the file if the format
636 is bad."""
637 try:
638 self._RealGetContents()
639 except BadZipfile:
640 if not self._filePassed:
641 self.fp.close()
642 self.fp = None
643 raise
644
645 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000646 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000647 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000648 endrec = _EndRecData(fp)
649 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000650 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000651 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000652 print(endrec)
Tim Peterse1190062001-01-15 03:34:38 +0000653 size_cd = endrec[5] # bytes in central directory
654 offset_cd = endrec[6] # offset of central directory
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000655 self.comment = endrec[8] # archive comment
656 # endrec[9] is the offset of the "End of Central Dir" record
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000657 if endrec[9] > ZIP64_LIMIT:
658 x = endrec[9] - size_cd - 56 - 20
659 else:
660 x = endrec[9] - size_cd
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000661 # "concat" is zero, unless zip was concatenated to another file
662 concat = x - offset_cd
663 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000664 print("given, inferred, offset", offset_cd, x, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000665 # self.start_dir: Position of start of central directory
666 self.start_dir = offset_cd + concat
667 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000668 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000669 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000670 total = 0
671 while total < size_cd:
672 centdir = fp.read(46)
673 total = total + 46
674 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000675 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 centdir = struct.unpack(structCentralDir, centdir)
677 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000678 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000679 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000680 # Create ZipInfo instance to store file information
Guido van Rossum98297ee2007-11-06 21:34:58 +0000681 x = ZipInfo(filename.decode("utf-8"))
Fred Drake3e038e52001-02-28 17:56:26 +0000682 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
683 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
684 total = (total + centdir[_CD_FILENAME_LENGTH]
685 + centdir[_CD_EXTRA_FIELD_LENGTH]
686 + centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000687 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000688 (x.create_version, x.create_system, x.extract_version, x.reserved,
689 x.flag_bits, x.compress_type, t, d,
690 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
691 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
692 # Convert date/time code to (year, month, day, hour, min, sec)
693 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000694 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000695
696 x._decodeExtra()
697 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000698 self.filelist.append(x)
699 self.NameToInfo[x.filename] = x
700 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000701 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000702
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703
704 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000705 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 l = []
707 for data in self.filelist:
708 l.append(data.filename)
709 return l
710
711 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000712 """Return a list of class ZipInfo instances for files in the
713 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 return self.filelist
715
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000716 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000717 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000718 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
719 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 for zinfo in self.filelist:
721 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000722 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
723 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724
725 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000726 """Read all the files and check the CRC."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000727 for zinfo in self.filelist:
728 try:
Tim Peterse1190062001-01-15 03:34:38 +0000729 self.read(zinfo.filename) # Check CRC-32
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000730 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 return zinfo.filename
732
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000733
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000735 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000736 info = self.NameToInfo.get(name)
737 if info is None:
738 raise KeyError(
739 'There is no item named %r in the archive' % name)
740
741 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742
Thomas Wouterscf297e42007-02-23 15:07:44 +0000743 def setpassword(self, pwd):
744 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000745 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000746 self.pwd = pwd
747
748 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000749 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000750 return self.open(name, "r", pwd).read()
751
752 def open(self, name, mode="r", pwd=None):
753 """Return file-like object for 'name'."""
754 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000755 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000757 raise RuntimeError(
758 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000759
Guido van Rossumd8faa362007-04-27 19:54:29 +0000760 # Only open a new file for instances where we were not
761 # given a file object in the constructor
762 if self._filePassed:
763 zef_file = self.fp
764 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000765 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766
767 # Get info object for name
768 zinfo = self.getinfo(name)
769
770 filepos = zef_file.tell()
771
772 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000773
774 # Skip the file header:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775 fheader = zef_file.read(30)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000776 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000777 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778
779 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000781 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000783
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000784 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000785 raise BadZipfile(
786 'File name in directory %r and header %r differ.'
787 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000788
Guido van Rossumd8faa362007-04-27 19:54:29 +0000789 # check for encrypted flag & handle password
790 is_encrypted = zinfo.flag_bits & 0x1
791 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000792 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793 if not pwd:
794 pwd = self.pwd
795 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000796 raise RuntimeError("File %s is encrypted, "
797 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798
Thomas Wouterscf297e42007-02-23 15:07:44 +0000799 zd = _ZipDecrypter(pwd)
800 # The first 12 bytes in the cypher stream is an encryption header
801 # used to strengthen the algorithm. The first 11 bytes are
802 # completely random, while the 12th contains the MSB of the CRC,
803 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000805 h = list(map(zd, bytes[0:12]))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000806 if h[11] != ((zinfo.CRC>>24) & 255):
Collin Winterce36ad82007-08-30 01:19:48 +0000807 raise RuntimeError("Bad password for file %s" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808
809 # build and return a ZipExtFile
810 if zd is None:
811 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000813 zef = ZipExtFile(zef_file, zinfo, zd)
814
815 # set universal newlines on ZipExtFile if necessary
816 if "U" in mode:
817 zef.set_univ_newlines(True)
818 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819
Christian Heimes790c8232008-01-07 21:14:23 +0000820 def extract(self, member, path=None, pwd=None):
821 """Extract a member from the archive to the current working directory,
822 using its full name. Its file information is extracted as accurately
823 as possible. `member' may be a filename or a ZipInfo object. You can
824 specify a different directory using `path'.
825 """
826 if not isinstance(member, ZipInfo):
827 member = self.getinfo(member)
828
829 if path is None:
830 path = os.getcwd()
831
832 return self._extract_member(member, path, pwd)
833
834 def extractall(self, path=None, members=None, pwd=None):
835 """Extract all members from the archive to the current working
836 directory. `path' specifies a different directory to extract to.
837 `members' is optional and must be a subset of the list returned
838 by namelist().
839 """
840 if members is None:
841 members = self.namelist()
842
843 for zipinfo in members:
844 self.extract(zipinfo, path, pwd)
845
846 def _extract_member(self, member, targetpath, pwd):
847 """Extract the ZipInfo object 'member' to a physical
848 file on the path targetpath.
849 """
850 # build the destination pathname, replacing
851 # forward slashes to platform specific separators.
852 if targetpath[-1:] == "/":
853 targetpath = targetpath[:-1]
854
855 # don't include leading "/" from file name if present
856 if os.path.isabs(member.filename):
857 targetpath = os.path.join(targetpath, member.filename[1:])
858 else:
859 targetpath = os.path.join(targetpath, member.filename)
860
861 targetpath = os.path.normpath(targetpath)
862
863 # Create all upper directories if necessary.
864 upperdirs = os.path.dirname(targetpath)
865 if upperdirs and not os.path.exists(upperdirs):
866 os.makedirs(upperdirs)
867
868 source = self.open(member.filename, pwd=pwd)
869 target = open(targetpath, "wb")
870 shutil.copyfileobj(source, target)
871 source.close()
872 target.close()
873
874 return targetpath
875
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000876 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000877 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000878 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000879 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000880 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000881 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000882 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000883 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000884 raise RuntimeError(
885 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000886 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000887 raise RuntimeError(
888 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000889 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000890 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000891 if zinfo.file_size > ZIP64_LIMIT:
892 if not self._allowZip64:
893 raise LargeZipFile("Filesize would require ZIP64 extensions")
894 if zinfo.header_offset > ZIP64_LIMIT:
895 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000896 raise LargeZipFile(
897 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898
899 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000900 """Put the bytes from filename into the archive under the name
901 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000902 if not self.fp:
903 raise RuntimeError(
904 "Attempt to write to ZIP archive that was already closed")
905
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000906 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000907 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000908 date_time = mtime[0:6]
909 # Create ZipInfo instance to store file information
910 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000911 arcname = filename
912 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
913 while arcname[0] in (os.sep, os.altsep):
914 arcname = arcname[1:]
915 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000916 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000917 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +0000918 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000919 else:
Tim Peterse1190062001-01-15 03:34:38 +0000920 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000921
922 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000923 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +0000924 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000925
926 self._writecheck(zinfo)
927 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000928 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +0000929 # Must overwrite CRC and sizes with correct data later
930 zinfo.CRC = CRC = 0
931 zinfo.compress_size = compress_size = 0
932 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000933 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000934 if zinfo.compress_type == ZIP_DEFLATED:
935 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
936 zlib.DEFLATED, -15)
937 else:
938 cmpr = None
939 while 1:
940 buf = fp.read(1024 * 8)
941 if not buf:
942 break
943 file_size = file_size + len(buf)
944 CRC = binascii.crc32(buf, CRC)
945 if cmpr:
946 buf = cmpr.compress(buf)
947 compress_size = compress_size + len(buf)
948 self.fp.write(buf)
949 fp.close()
950 if cmpr:
951 buf = cmpr.flush()
952 compress_size = compress_size + len(buf)
953 self.fp.write(buf)
954 zinfo.compress_size = compress_size
955 else:
956 zinfo.compress_size = file_size
957 zinfo.CRC = CRC
958 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +0000959 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +0000960 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +0000961 self.fp.seek(zinfo.header_offset + 14, 0)
Brett Cannonff450f72004-07-10 19:09:20 +0000962 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000963 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +0000964 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000965 self.filelist.append(zinfo)
966 self.NameToInfo[zinfo.filename] = zinfo
967
Guido van Rossum85825dc2007-08-27 17:03:28 +0000968 def writestr(self, zinfo_or_arcname, data):
969 """Write a file into the archive. The contents is 'data', which
970 may be either a 'str' or a 'bytes' instance; if it is a 'str',
971 it is encoded as UTF-8 first.
972 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +0000973 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +0000974 if isinstance(data, str):
975 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +0000976 if not isinstance(zinfo_or_arcname, ZipInfo):
977 zinfo = ZipInfo(filename=zinfo_or_arcname,
978 date_time=time.localtime(time.time()))
979 zinfo.compress_type = self.compression
980 else:
981 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000982
983 if not self.fp:
984 raise RuntimeError(
985 "Attempt to write to ZIP archive that was already closed")
986
Guido van Rossum85825dc2007-08-27 17:03:28 +0000987 zinfo.file_size = len(data) # Uncompressed size
988 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000989 self._writecheck(zinfo)
990 self._didModify = True
Guido van Rossum85825dc2007-08-27 17:03:28 +0000991 zinfo.CRC = binascii.crc32(data) # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000992 if zinfo.compress_type == ZIP_DEFLATED:
993 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
994 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +0000995 data = co.compress(data) + co.flush()
996 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 else:
998 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +0000999 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001000 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001001 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001002 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001004 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001005 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001006 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 self.filelist.append(zinfo)
1008 self.NameToInfo[zinfo.filename] = zinfo
1009
1010 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001011 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001012 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013
1014 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001015 """Close the file, and for mode "w" and "a" write the ending
1016 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001017 if self.fp is None:
1018 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001019
1020 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 count = 0
1022 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001023 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024 count = count + 1
1025 dt = zinfo.date_time
1026 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001027 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001028 extra = []
1029 if zinfo.file_size > ZIP64_LIMIT \
1030 or zinfo.compress_size > ZIP64_LIMIT:
1031 extra.append(zinfo.file_size)
1032 extra.append(zinfo.compress_size)
1033 file_size = 0xffffffff #-1
1034 compress_size = 0xffffffff #-1
1035 else:
1036 file_size = zinfo.file_size
1037 compress_size = zinfo.compress_size
1038
1039 if zinfo.header_offset > ZIP64_LIMIT:
1040 extra.append(zinfo.header_offset)
1041 header_offset = -1 # struct "l" format: 32 one bits
1042 else:
1043 header_offset = zinfo.header_offset
1044
1045 extra_data = zinfo.extra
1046 if extra:
1047 # Append a ZIP64 field to the extra's
1048 extra_data = struct.pack(
1049 '<hh' + 'q'*len(extra),
1050 1, 8*len(extra), *extra) + extra_data
1051
1052 extract_version = max(45, zinfo.extract_version)
1053 create_version = max(45, zinfo.create_version)
1054 else:
1055 extract_version = zinfo.extract_version
1056 create_version = zinfo.create_version
1057
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001058 centdir = struct.pack(structCentralDir,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001059 stringCentralDir, create_version,
1060 zinfo.create_system, extract_version, zinfo.reserved,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001062 zinfo.CRC, compress_size, file_size,
1063 len(zinfo.filename), len(extra_data), len(zinfo.comment),
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001064 0, zinfo.internal_attr, zinfo.external_attr,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001065 header_offset)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001066 self.fp.write(centdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001067 self.fp.write(zinfo.filename.encode("utf-8"))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001068 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001069 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001070
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071 pos2 = self.fp.tell()
1072 # Write end-of-zip-archive record
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001073 if pos1 > ZIP64_LIMIT:
1074 # Need to write the ZIP64 end-of-archive records
1075 zip64endrec = struct.pack(
1076 structEndArchive64, stringEndArchive64,
1077 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1078 self.fp.write(zip64endrec)
1079
1080 zip64locrec = struct.pack(
1081 structEndArchive64Locator,
1082 stringEndArchive64Locator, 0, pos2, 1)
1083 self.fp.write(zip64locrec)
1084
1085 # XXX Why is `pos3` computed next? It's never referenced.
1086 pos3 = self.fp.tell()
1087 endrec = struct.pack(structEndArchive, stringEndArchive,
1088 0, 0, count, count, pos2 - pos1, -1, 0)
1089 self.fp.write(endrec)
1090
1091 else:
1092 endrec = struct.pack(structEndArchive, stringEndArchive,
1093 0, 0, count, count, pos2 - pos1, pos1, 0)
1094 self.fp.write(endrec)
Guido van Rossumf85af612001-04-14 16:45:14 +00001095 self.fp.flush()
Fred Drake3d9091e2001-03-26 15:49:24 +00001096 if not self._filePassed:
1097 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 self.fp = None
1099
1100
1101class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001102 """Class to create ZIP archives with Python library files and packages."""
1103
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 def writepy(self, pathname, basename = ""):
1105 """Add all files from "pathname" to the ZIP archive.
1106
Fred Drake484d7352000-10-02 21:14:52 +00001107 If pathname is a package directory, search the directory and
1108 all package subdirectories recursively for all *.py and enter
1109 the modules into the archive. If pathname is a plain
1110 directory, listdir *.py and enter all modules. Else, pathname
1111 must be a Python *.py file and the module will be put into the
1112 archive. Added modules are always module.pyo or module.pyc.
1113 This method will compile the module.py into module.pyc if
1114 necessary.
1115 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 dir, name = os.path.split(pathname)
1117 if os.path.isdir(pathname):
1118 initname = os.path.join(pathname, "__init__.py")
1119 if os.path.isfile(initname):
1120 # This is a package directory, add it
1121 if basename:
1122 basename = "%s/%s" % (basename, name)
1123 else:
1124 basename = name
1125 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001126 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127 fname, arcname = self._get_codename(initname[0:-3], basename)
1128 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001129 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001130 self.write(fname, arcname)
1131 dirlist = os.listdir(pathname)
1132 dirlist.remove("__init__.py")
1133 # Add all *.py files and package subdirectories
1134 for filename in dirlist:
1135 path = os.path.join(pathname, filename)
1136 root, ext = os.path.splitext(filename)
1137 if os.path.isdir(path):
1138 if os.path.isfile(os.path.join(path, "__init__.py")):
1139 # This is a package directory, add it
1140 self.writepy(path, basename) # Recursive call
1141 elif ext == ".py":
1142 fname, arcname = self._get_codename(path[0:-3],
1143 basename)
1144 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001145 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146 self.write(fname, arcname)
1147 else:
1148 # This is NOT a package directory, add its files at top level
1149 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001150 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001151 for filename in os.listdir(pathname):
1152 path = os.path.join(pathname, filename)
1153 root, ext = os.path.splitext(filename)
1154 if ext == ".py":
1155 fname, arcname = self._get_codename(path[0:-3],
1156 basename)
1157 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001158 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159 self.write(fname, arcname)
1160 else:
1161 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001162 raise RuntimeError(
1163 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001164 fname, arcname = self._get_codename(pathname[0:-3], basename)
1165 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001166 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001167 self.write(fname, arcname)
1168
1169 def _get_codename(self, pathname, basename):
1170 """Return (filename, archivename) for the path.
1171
Fred Drake484d7352000-10-02 21:14:52 +00001172 Given a module name path, return the correct file path and
1173 archive name, compiling if necessary. For example, given
1174 /python/lib/string, return (/python/lib/string.pyc, string).
1175 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 file_py = pathname + ".py"
1177 file_pyc = pathname + ".pyc"
1178 file_pyo = pathname + ".pyo"
1179 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001180 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001181 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001182 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001183 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001184 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001185 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001186 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001187 try:
1188 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001189 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001190 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 fname = file_pyc
1192 else:
1193 fname = file_pyc
1194 archivename = os.path.split(fname)[1]
1195 if basename:
1196 archivename = "%s/%s" % (basename, archivename)
1197 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001198
1199
1200def main(args = None):
1201 import textwrap
1202 USAGE=textwrap.dedent("""\
1203 Usage:
1204 zipfile.py -l zipfile.zip # Show listing of a zipfile
1205 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1206 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1207 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1208 """)
1209 if args is None:
1210 args = sys.argv[1:]
1211
1212 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001213 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001214 sys.exit(1)
1215
1216 if args[0] == '-l':
1217 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001218 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001219 sys.exit(1)
1220 zf = ZipFile(args[1], 'r')
1221 zf.printdir()
1222 zf.close()
1223
1224 elif args[0] == '-t':
1225 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001226 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001227 sys.exit(1)
1228 zf = ZipFile(args[1], 'r')
1229 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001230 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001231
1232 elif args[0] == '-e':
1233 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001234 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001235 sys.exit(1)
1236
1237 zf = ZipFile(args[1], 'r')
1238 out = args[2]
1239 for path in zf.namelist():
1240 if path.startswith('./'):
1241 tgt = os.path.join(out, path[2:])
1242 else:
1243 tgt = os.path.join(out, path)
1244
1245 tgtdir = os.path.dirname(tgt)
1246 if not os.path.exists(tgtdir):
1247 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001248 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001249 fp.write(zf.read(path))
1250 fp.close()
1251 zf.close()
1252
1253 elif args[0] == '-c':
1254 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001255 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001256 sys.exit(1)
1257
1258 def addToZip(zf, path, zippath):
1259 if os.path.isfile(path):
1260 zf.write(path, zippath, ZIP_DEFLATED)
1261 elif os.path.isdir(path):
1262 for nm in os.listdir(path):
1263 addToZip(zf,
1264 os.path.join(path, nm), os.path.join(zippath, nm))
1265 # else: ignore
1266
1267 zf = ZipFile(args[1], 'w', allowZip64=True)
1268 for src in args[2:]:
1269 addToZip(zf, src, os.path.basename(src))
1270
1271 zf.close()
1272
1273if __name__ == "__main__":
1274 main()