blob: d545c5566fa6a68bdbacc2b91fedbcefb6e89606 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
Serhiy Storchakaf15e5242015-01-26 13:53:38 +020016import threading
Barry Warsaw28a691b2010-04-17 00:19:56 +000017
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018
19try:
Tim Peterse1190062001-01-15 03:34:38 +000020 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000021 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040022except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020026try:
27 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029 bz2 = None
30
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020031try:
32 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040033except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034 lzma = None
35
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020036__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000038 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Georg Brandl4d540882010-10-28 06:42:33 +000040class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000041 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042
43
44class LargeZipFile(Exception):
45 """
46 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
47 and those extensions are disabled.
48 """
49
Georg Brandl4d540882010-10-28 06:42:33 +000050error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
51
Guido van Rossum32abe6f2000-03-31 17:30:02 +000052
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000053ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030054ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000056
Guido van Rossum32abe6f2000-03-31 17:30:02 +000057# constants for Zip file compression methods
58ZIP_STORED = 0
59ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020060ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020061ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000062# Other ZIP compression methods not supported
63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020064DEFAULT_VERSION = 20
65ZIP64_VERSION = 45
66BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020068# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020070
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071# Below are some formats and associated data for reading/writing headers using
72# the struct module. The names and structures of headers/records are those used
73# in the PKWARE description of the ZIP file format:
74# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
75# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000076
Martin v. Löwisb09b8442008-07-03 14:13:42 +000077# The "end of central directory" structure, magic number, size, and indices
78# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000079structEndArchive = b"<4s4H2LH"
80stringEndArchive = b"PK\005\006"
81sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000082
83_ECD_SIGNATURE = 0
84_ECD_DISK_NUMBER = 1
85_ECD_DISK_START = 2
86_ECD_ENTRIES_THIS_DISK = 3
87_ECD_ENTRIES_TOTAL = 4
88_ECD_SIZE = 5
89_ECD_OFFSET = 6
90_ECD_COMMENT_SIZE = 7
91# These last two indices are not part of the structure as defined in the
92# spec, but they are used internally by this module as a convenience
93_ECD_COMMENT = 8
94_ECD_LOCATION = 9
95
96# The "central directory" structure, magic number, size, and indices
97# of entries in the structure (section V.F in the format document)
98structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000099stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100sizeCentralDir = struct.calcsize(structCentralDir)
101
Fred Drake3e038e52001-02-28 17:56:26 +0000102# indexes of entries in the central directory structure
103_CD_SIGNATURE = 0
104_CD_CREATE_VERSION = 1
105_CD_CREATE_SYSTEM = 2
106_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000107_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000108_CD_FLAG_BITS = 5
109_CD_COMPRESS_TYPE = 6
110_CD_TIME = 7
111_CD_DATE = 8
112_CD_CRC = 9
113_CD_COMPRESSED_SIZE = 10
114_CD_UNCOMPRESSED_SIZE = 11
115_CD_FILENAME_LENGTH = 12
116_CD_EXTRA_FIELD_LENGTH = 13
117_CD_COMMENT_LENGTH = 14
118_CD_DISK_NUMBER_START = 15
119_CD_INTERNAL_FILE_ATTRIBUTES = 16
120_CD_EXTERNAL_FILE_ATTRIBUTES = 17
121_CD_LOCAL_HEADER_OFFSET = 18
122
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123# The "local file header" structure, magic number, size, and indices
124# (section V.A in the format document)
125structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000126stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000127sizeFileHeader = struct.calcsize(structFileHeader)
128
Fred Drake3e038e52001-02-28 17:56:26 +0000129_FH_SIGNATURE = 0
130_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_GENERAL_PURPOSE_FLAG_BITS = 3
133_FH_COMPRESSION_METHOD = 4
134_FH_LAST_MOD_TIME = 5
135_FH_LAST_MOD_DATE = 6
136_FH_CRC = 7
137_FH_COMPRESSED_SIZE = 8
138_FH_UNCOMPRESSED_SIZE = 9
139_FH_FILENAME_LENGTH = 10
140_FH_EXTRA_FIELD_LENGTH = 11
141
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000143structEndArchive64Locator = "<4sLQL"
144stringEndArchive64Locator = b"PK\x06\x07"
145sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000146
147# The "Zip64 end of central directory" record, magic number, size, and indices
148# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000149structEndArchive64 = "<4sQ2H2L4Q"
150stringEndArchive64 = b"PK\x06\x06"
151sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000152
153_CD64_SIGNATURE = 0
154_CD64_DIRECTORY_RECSIZE = 1
155_CD64_CREATE_VERSION = 2
156_CD64_EXTRACT_VERSION = 3
157_CD64_DISK_NUMBER = 4
158_CD64_DISK_NUMBER_START = 5
159_CD64_NUMBER_ENTRIES_THIS_DISK = 6
160_CD64_NUMBER_ENTRIES_TOTAL = 7
161_CD64_DIRECTORY_SIZE = 8
162_CD64_OFFSET_START_CENTDIR = 9
163
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166 if _EndRecData(fp):
167 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200168 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000170 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000172def is_zipfile(filename):
173 """Quickly see if a file is a ZIP file by checking the magic number.
174
175 The filename argument may be a file or file-like object too.
176 """
177 result = False
178 try:
179 if hasattr(filename, "read"):
180 result = _check_zipfile(fp=filename)
181 else:
182 with open(filename, "rb") as fp:
183 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200184 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185 pass
186 return result
187
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188def _EndRecData64(fpin, offset, endrec):
189 """
190 Read the ZIP64 end-of-archive records and use that to update endrec
191 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000192 try:
193 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200194 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 # If the seek fails, the file is not large enough to contain a ZIP64
196 # end-of-archive record, so just return the end record we were given.
197 return endrec
198
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000199 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200200 if len(data) != sizeEndCentDir64Locator:
201 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000202 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
203 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 return endrec
205
206 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000207 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208
209 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
211 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200212 if len(data) != sizeEndCentDir64:
213 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200215 dircount, dircount2, dirsize, diroffset = \
216 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000218 return endrec
219
220 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000221 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222 endrec[_ECD_DISK_NUMBER] = disk_num
223 endrec[_ECD_DISK_START] = disk_dir
224 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
225 endrec[_ECD_ENTRIES_TOTAL] = dircount2
226 endrec[_ECD_SIZE] = dirsize
227 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
230
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000231def _EndRecData(fpin):
232 """Return data from the "End of Central Directory" record, or None.
233
234 The data is a list of the nine items in the ZIP "End of central dir"
235 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236
237 # Determine file size
238 fpin.seek(0, 2)
239 filesize = fpin.tell()
240
241 # Check to see if this is ZIP file with no archive comment (the
242 # "end of central directory" structure should be the last item in the
243 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000244 try:
245 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200246 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200249 if (len(data) == sizeEndCentDir and
250 data[0:4] == stringEndArchive and
251 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000253 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec=list(endrec)
255
256 # Append a blank comment and record start offset
257 endrec.append(b"")
258 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000260 # Try to read the "Zip64 end of central directory" structure
261 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
263 # Either this is not a ZIP file, or it is a ZIP file with an archive
264 # comment. Search the end of the file for the "end of central directory"
265 # record signature. The comment is the last item in the ZIP file and may be
266 # up to 64K long. It is assumed that the "end of central directory" magic
267 # number does not appear in the comment.
268 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
269 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000270 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000271 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000272 if start >= 0:
273 # found the magic number; attempt to unpack and interpret
274 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200275 if len(recData) != sizeEndCentDir:
276 # Zip file is corrupted.
277 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000278 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400279 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
280 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
281 endrec.append(comment)
282 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000283
R David Murray4fbb9db2011-06-09 15:50:51 -0400284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, maxCommentStart + start - filesize,
286 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000287
288 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200289 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000290
Fred Drake484d7352000-10-02 21:14:52 +0000291
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000293 """Class with attributes describing each file in the ZIP archive."""
294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200296 'orig_filename',
297 'filename',
298 'date_time',
299 'compress_type',
300 'comment',
301 'extra',
302 'create_system',
303 'create_version',
304 'extract_version',
305 'reserved',
306 'flag_bits',
307 'volume',
308 'internal_attr',
309 'external_attr',
310 'header_offset',
311 'CRC',
312 'compress_size',
313 'file_size',
314 '_raw_time',
315 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000318 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
320 # Terminate the file name at the first null byte. Null bytes in file
321 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000322 null_byte = filename.find(chr(0))
323 if null_byte >= 0:
324 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325 # This is used to ensure paths in generated ZIP files always use
326 # forward slashes as the directory separator, as required by the
327 # ZIP format specification.
328 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000329 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330
Greg Ward8e36d282003-06-18 00:53:06 +0000331 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000332 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800333
334 if date_time[0] < 1980:
335 raise ValueError('ZIP does not support timestamps before 1980')
336
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
371 isdir = self.filename[-1:] == '/'
372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000439 if tp == 1:
440 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 0:
447 counts = ()
448 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000449 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
451 idx = 0
452
453 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000454 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455 self.file_size = counts[idx]
456 idx += 1
457
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000458 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.compress_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 old = self.header_offset
464 self.header_offset = counts[idx]
465 idx+=1
466
467 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468
469
Thomas Wouterscf297e42007-02-23 15:07:44 +0000470class _ZipDecrypter:
471 """Class to handle decryption of files stored within a ZIP archive.
472
473 ZIP supports a password-based form of encryption. Even though known
474 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000475 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000476
477 Usage:
478 zd = _ZipDecrypter(mypwd)
479 plain_char = zd(cypher_char)
480 plain_text = map(zd, cypher_text)
481 """
482
483 def _GenerateCRCTable():
484 """Generate a CRC-32 table.
485
486 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
487 internal keys. We noticed that a direct implementation is faster than
488 relying on binascii.crc32().
489 """
490 poly = 0xedb88320
491 table = [0] * 256
492 for i in range(256):
493 crc = i
494 for j in range(8):
495 if crc & 1:
496 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
497 else:
498 crc = ((crc >> 1) & 0x7FFFFFFF)
499 table[i] = crc
500 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500501 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502
503 def _crc32(self, ch, crc):
504 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000505 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000506
507 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500508 if _ZipDecrypter.crctable is None:
509 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000510 self.key0 = 305419896
511 self.key1 = 591751049
512 self.key2 = 878082192
513 for p in pwd:
514 self._UpdateKeys(p)
515
516 def _UpdateKeys(self, c):
517 self.key0 = self._crc32(c, self.key0)
518 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
519 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000520 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000521
522 def __call__(self, c):
523 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000524 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000525 k = self.key2 | 2
526 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000527 self._UpdateKeys(c)
528 return c
529
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200530
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200531class LZMACompressor:
532
533 def __init__(self):
534 self._comp = None
535
536 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200537 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200538 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200539 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200540 ])
541 return struct.pack('<BBH', 9, 4, len(props)) + props
542
543 def compress(self, data):
544 if self._comp is None:
545 return self._init() + self._comp.compress(data)
546 return self._comp.compress(data)
547
548 def flush(self):
549 if self._comp is None:
550 return self._init() + self._comp.flush()
551 return self._comp.flush()
552
553
554class LZMADecompressor:
555
556 def __init__(self):
557 self._decomp = None
558 self._unconsumed = b''
559 self.eof = False
560
561 def decompress(self, data):
562 if self._decomp is None:
563 self._unconsumed += data
564 if len(self._unconsumed) <= 4:
565 return b''
566 psize, = struct.unpack('<H', self._unconsumed[2:4])
567 if len(self._unconsumed) <= 4 + psize:
568 return b''
569
570 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200571 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
572 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200573 ])
574 data = self._unconsumed[4 + psize:]
575 del self._unconsumed
576
577 result = self._decomp.decompress(data)
578 self.eof = self._decomp.eof
579 return result
580
581
582compressor_names = {
583 0: 'store',
584 1: 'shrink',
585 2: 'reduce',
586 3: 'reduce',
587 4: 'reduce',
588 5: 'reduce',
589 6: 'implode',
590 7: 'tokenize',
591 8: 'deflate',
592 9: 'deflate64',
593 10: 'implode',
594 12: 'bzip2',
595 14: 'lzma',
596 18: 'terse',
597 19: 'lz77',
598 97: 'wavpack',
599 98: 'ppmd',
600}
601
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200602def _check_compression(compression):
603 if compression == ZIP_STORED:
604 pass
605 elif compression == ZIP_DEFLATED:
606 if not zlib:
607 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200608 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609 elif compression == ZIP_BZIP2:
610 if not bz2:
611 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200612 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200613 elif compression == ZIP_LZMA:
614 if not lzma:
615 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200616 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200617 else:
618 raise RuntimeError("That compression method is not supported")
619
620
621def _get_compressor(compress_type):
622 if compress_type == ZIP_DEFLATED:
623 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200624 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200625 elif compress_type == ZIP_BZIP2:
626 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200627 elif compress_type == ZIP_LZMA:
628 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200629 else:
630 return None
631
632
633def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200634 if compress_type == ZIP_STORED:
635 return None
636 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 return zlib.decompressobj(-15)
638 elif compress_type == ZIP_BZIP2:
639 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200640 elif compress_type == ZIP_LZMA:
641 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200642 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200643 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200644 if descr:
645 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
646 else:
647 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200648
649
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200650class _SharedFile:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200651 def __init__(self, file, pos, close, lock):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200652 self._file = file
653 self._pos = pos
654 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200655 self._lock = lock
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200656
657 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200658 with self._lock:
659 self._file.seek(self._pos)
660 data = self._file.read(n)
661 self._pos = self._file.tell()
662 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200663
664 def close(self):
665 if self._file is not None:
666 fileobj = self._file
667 self._file = None
668 self._close(fileobj)
669
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200670# Provide the tell method for unseekable stream
671class _Tellable:
672 def __init__(self, fp):
673 self.fp = fp
674 self.offset = 0
675
676 def write(self, data):
677 n = self.fp.write(data)
678 self.offset += n
679 return n
680
681 def tell(self):
682 return self.offset
683
684 def flush(self):
685 self.fp.flush()
686
687 def close(self):
688 self.fp.close()
689
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200690
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000691class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000692 """File-like object for reading an archive member.
693 Is returned by ZipFile.open().
694 """
695
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000696 # Max size supported by decompressor.
697 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000699 # Read from compressed files in 4k blocks.
700 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000701
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000702 # Search for universal newlines or line chunks.
703 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
704
Łukasz Langae94980a2010-11-22 23:31:26 +0000705 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
706 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000707 self._fileobj = fileobj
708 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000709 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000710
Ezio Melotti92b47432010-01-28 01:44:41 +0000711 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000712 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000714
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200715 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000716
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200717 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000718 self._readbuffer = b''
719 self._offset = 0
720
721 self._universal = 'U' in mode
722 self.newlines = None
723
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000724 # Adjust read size for encrypted files since the first 12 bytes
725 # are for the encryption/password information.
726 if self._decrypter is not None:
727 self._compress_left -= 12
728
729 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000730 self.name = zipinfo.filename
731
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000732 if hasattr(zipinfo, 'CRC'):
733 self._expected_crc = zipinfo.CRC
734 self._running_crc = crc32(b'') & 0xffffffff
735 else:
736 self._expected_crc = None
737
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200738 def __repr__(self):
739 result = ['<%s.%s' % (self.__class__.__module__,
740 self.__class__.__qualname__)]
741 if not self.closed:
742 result.append(' name=%r mode=%r' % (self.name, self.mode))
743 if self._compress_type != ZIP_STORED:
744 result.append(' compress_type=%s' %
745 compressor_names.get(self._compress_type,
746 self._compress_type))
747 else:
748 result.append(' [closed]')
749 result.append('>')
750 return ''.join(result)
751
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000752 def readline(self, limit=-1):
753 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000756 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000757
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758 if not self._universal and limit < 0:
759 # Shortcut common case - newline found in buffer.
760 i = self._readbuffer.find(b'\n', self._offset) + 1
761 if i > 0:
762 line = self._readbuffer[self._offset: i]
763 self._offset = i
764 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000765
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000766 if not self._universal:
767 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000768
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000769 line = b''
770 while limit < 0 or len(line) < limit:
771 readahead = self.peek(2)
772 if readahead == b'':
773 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000775 #
776 # Search for universal newlines or line chunks.
777 #
778 # The pattern returns either a line chunk or a newline, but not
779 # both. Combined with peek(2), we are assured that the sequence
780 # '\r\n' is always retrieved completely and never split into
781 # separate newlines - '\r', '\n' due to coincidental readaheads.
782 #
783 match = self.PATTERN.search(readahead)
784 newline = match.group('newline')
785 if newline is not None:
786 if self.newlines is None:
787 self.newlines = []
788 if newline not in self.newlines:
789 self.newlines.append(newline)
790 self._offset += len(newline)
791 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000792
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793 chunk = match.group('chunk')
794 if limit >= 0:
795 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797 self._offset += len(chunk)
798 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800 return line
801
802 def peek(self, n=1):
803 """Returns buffered bytes without advancing the position."""
804 if n > len(self._readbuffer) - self._offset:
805 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200806 if len(chunk) > self._offset:
807 self._readbuffer = chunk + self._readbuffer[self._offset:]
808 self._offset = 0
809 else:
810 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000811
812 # Return up to 512 bytes to reduce allocation overhead for tight loops.
813 return self._readbuffer[self._offset: self._offset + 512]
814
815 def readable(self):
816 return True
817
818 def read(self, n=-1):
819 """Read and return up to n bytes.
820 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200822 if n is None or n < 0:
823 buf = self._readbuffer[self._offset:]
824 self._readbuffer = b''
825 self._offset = 0
826 while not self._eof:
827 buf += self._read1(self.MAX_N)
828 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829
Antoine Pitrou78157b32012-06-23 16:44:48 +0200830 end = n + self._offset
831 if end < len(self._readbuffer):
832 buf = self._readbuffer[self._offset:end]
833 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200834 return buf
835
Antoine Pitrou78157b32012-06-23 16:44:48 +0200836 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200837 buf = self._readbuffer[self._offset:]
838 self._readbuffer = b''
839 self._offset = 0
840 while n > 0 and not self._eof:
841 data = self._read1(n)
842 if n < len(data):
843 self._readbuffer = data
844 self._offset = n
845 buf += data[:n]
846 break
847 buf += data
848 n -= len(data)
849 return buf
850
851 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000852 # Update the CRC using the given data.
853 if self._expected_crc is None:
854 # No need to compute the CRC if we don't have a reference value
855 return
856 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
857 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200858 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000859 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000860
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000861 def read1(self, n):
862 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200864 if n is None or n < 0:
865 buf = self._readbuffer[self._offset:]
866 self._readbuffer = b''
867 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300868 while not self._eof:
869 data = self._read1(self.MAX_N)
870 if data:
871 buf += data
872 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200873 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874
Antoine Pitrou78157b32012-06-23 16:44:48 +0200875 end = n + self._offset
876 if end < len(self._readbuffer):
877 buf = self._readbuffer[self._offset:end]
878 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200879 return buf
880
Antoine Pitrou78157b32012-06-23 16:44:48 +0200881 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 buf = self._readbuffer[self._offset:]
883 self._readbuffer = b''
884 self._offset = 0
885 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300886 while not self._eof:
887 data = self._read1(n)
888 if n < len(data):
889 self._readbuffer = data
890 self._offset = n
891 buf += data[:n]
892 break
893 if data:
894 buf += data
895 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200896 return buf
897
898 def _read1(self, n):
899 # Read up to n compressed bytes with at most one read() system call,
900 # decrypt and decompress them.
901 if self._eof or n <= 0:
902 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000904 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200905 if self._compress_type == ZIP_DEFLATED:
906 ## Handle unconsumed data.
907 data = self._decompressor.unconsumed_tail
908 if n > len(data):
909 data += self._read2(n - len(data))
910 else:
911 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200913 if self._compress_type == ZIP_STORED:
914 self._eof = self._compress_left <= 0
915 elif self._compress_type == ZIP_DEFLATED:
916 n = max(n, self.MIN_READ_SIZE)
917 data = self._decompressor.decompress(data, n)
918 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200919 self._compress_left <= 0 and
920 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200921 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000922 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200923 else:
924 data = self._decompressor.decompress(data)
925 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200927 data = data[:self._left]
928 self._left -= len(data)
929 if self._left <= 0:
930 self._eof = True
931 self._update_crc(data)
932 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000933
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 def _read2(self, n):
935 if self._compress_left <= 0:
936 return b''
937
938 n = max(n, self.MIN_READ_SIZE)
939 n = min(n, self._compress_left)
940
941 data = self._fileobj.read(n)
942 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200943 if not data:
944 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200945
946 if self._decrypter is not None:
947 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000948 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949
Łukasz Langae94980a2010-11-22 23:31:26 +0000950 def close(self):
951 try:
952 if self._close_fileobj:
953 self._fileobj.close()
954 finally:
955 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000956
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000957
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000958class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000959 """ Class with methods to open, read, write, close, list zip files.
960
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200961 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000962
Fred Drake3d9091e2001-03-26 15:49:24 +0000963 file: Either the path to the file, or a file-like object.
964 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +0200965 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
966 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200967 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
968 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000969 allowZip64: if True ZipFile will create files with ZIP64 extensions when
970 needed, otherwise it will raise an exception when this would
971 be necessary.
972
Fred Drake3d9091e2001-03-26 15:49:24 +0000973 """
Fred Drake484d7352000-10-02 21:14:52 +0000974
Fred Drake90eac282001-02-28 05:29:34 +0000975 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800976 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000977
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200978 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +0200979 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
980 or append 'a'."""
981 if mode not in ('r', 'w', 'x', 'a'):
982 raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000983
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200984 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000985
986 self._allowZip64 = allowZip64
987 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000988 self.debug = 0 # Level of printing: 0 through 3
989 self.NameToInfo = {} # Find file info given name
990 self.filelist = [] # List of ZipInfo instances for archive
991 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200992 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +0000993 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400994 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000995
Fred Drake3d9091e2001-03-26 15:49:24 +0000996 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000997 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000998 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000999 self._filePassed = 0
1000 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001001 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1002 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001003 filemode = modeDict[mode]
1004 while True:
1005 try:
1006 self.fp = io.open(file, filemode)
1007 except OSError:
1008 if filemode in modeDict:
1009 filemode = modeDict[filemode]
1010 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001011 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001012 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001013 else:
1014 self._filePassed = 1
1015 self.fp = file
1016 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001017 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001018 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001019 self._seekable = True
Tim Petersa19a1682001-03-29 04:36:09 +00001020
Antoine Pitrou17babc52012-11-17 23:50:08 +01001021 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001022 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001023 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001024 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001025 # set the modified flag so central directory gets written
1026 # even if no files are added to the archive
1027 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001028 try:
1029 self.start_dir = self.fp.tell()
1030 except (AttributeError, OSError):
1031 self.fp = _Tellable(self.fp)
1032 self.start_dir = 0
1033 self._seekable = False
1034 else:
1035 # Some file-like objects can provide tell() but not seek()
1036 try:
1037 self.fp.seek(self.start_dir)
1038 except (AttributeError, OSError):
1039 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001040 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001041 try:
1042 # See if file is a zip file
1043 self._RealGetContents()
1044 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001045 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001046 except BadZipFile:
1047 # file is not a zip file, just append
1048 self.fp.seek(0, 2)
1049
1050 # set the modified flag so central directory gets written
1051 # even if no files are added to the archive
1052 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001053 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001054 else:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001055 raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001056 except:
1057 fp = self.fp
1058 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001059 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001060 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001062 def __enter__(self):
1063 return self
1064
1065 def __exit__(self, type, value, traceback):
1066 self.close()
1067
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001068 def __repr__(self):
1069 result = ['<%s.%s' % (self.__class__.__module__,
1070 self.__class__.__qualname__)]
1071 if self.fp is not None:
1072 if self._filePassed:
1073 result.append(' file=%r' % self.fp)
1074 elif self.filename is not None:
1075 result.append(' filename=%r' % self.filename)
1076 result.append(' mode=%r' % self.mode)
1077 else:
1078 result.append(' [closed]')
1079 result.append('>')
1080 return ''.join(result)
1081
Tim Peters7d3bad62001-04-04 18:56:49 +00001082 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001083 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001084 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001085 try:
1086 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001087 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001088 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001089 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001090 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001092 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001093 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1094 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001095 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001096
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001098 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001099 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1100 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001101 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1102
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001104 inferred = concat + offset_cd
1105 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 # self.start_dir: Position of start of central directory
1107 self.start_dir = offset_cd + concat
1108 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001109 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001110 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 total = 0
1112 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001113 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001114 if len(centdir) != sizeCentralDir:
1115 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001117 if centdir[_CD_SIGNATURE] != stringCentralDir:
1118 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001120 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001121 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001122 flags = centdir[5]
1123 if flags & 0x800:
1124 # UTF-8 file names extension
1125 filename = filename.decode('utf-8')
1126 else:
1127 # Historical ZIP filename encoding
1128 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001130 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001131 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1132 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001134 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001135 x.flag_bits, x.compress_type, t, d,
1136 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001137 if x.extract_version > MAX_EXTRACT_VERSION:
1138 raise NotImplementedError("zip file version %.1f" %
1139 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1141 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001142 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001144 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001145
1146 x._decodeExtra()
1147 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148 self.filelist.append(x)
1149 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001150
1151 # update total bytes read from central directory
1152 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1153 + centdir[_CD_EXTRA_FIELD_LENGTH]
1154 + centdir[_CD_COMMENT_LENGTH])
1155
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001156 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001157 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159
1160 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001161 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001162 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001163
1164 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001165 """Return a list of class ZipInfo instances for files in the
1166 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001167 return self.filelist
1168
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001169 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001170 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001171 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1172 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001173 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001174 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001175 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1176 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177
1178 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001179 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001180 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 for zinfo in self.filelist:
1182 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001183 # Read by chunks, to avoid an OverflowError or a
1184 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001185 with self.open(zinfo.filename, "r") as f:
1186 while f.read(chunk_size): # Check CRC-32
1187 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001188 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 return zinfo.filename
1190
1191 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001192 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001193 info = self.NameToInfo.get(name)
1194 if info is None:
1195 raise KeyError(
1196 'There is no item named %r in the archive' % name)
1197
1198 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199
Thomas Wouterscf297e42007-02-23 15:07:44 +00001200 def setpassword(self, pwd):
1201 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001202 if pwd and not isinstance(pwd, bytes):
1203 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1204 if pwd:
1205 self.pwd = pwd
1206 else:
1207 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001208
R David Murrayf50b38a2012-04-12 18:44:58 -04001209 @property
1210 def comment(self):
1211 """The comment text associated with the ZIP file."""
1212 return self._comment
1213
1214 @comment.setter
1215 def comment(self, comment):
1216 if not isinstance(comment, bytes):
1217 raise TypeError("comment: expected bytes, got %s" % type(comment))
1218 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001219 if len(comment) > ZIP_MAX_COMMENT:
1220 import warnings
1221 warnings.warn('Archive comment is too long; truncating to %d bytes'
1222 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001223 comment = comment[:ZIP_MAX_COMMENT]
1224 self._comment = comment
1225 self._didModify = True
1226
Thomas Wouterscf297e42007-02-23 15:07:44 +00001227 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001228 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001229 with self.open(name, "r", pwd) as fp:
1230 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001231
1232 def open(self, name, mode="r", pwd=None):
1233 """Return file-like object for 'name'."""
1234 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001235 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001236 if 'U' in mode:
1237 import warnings
1238 warnings.warn("'U' mode is deprecated",
1239 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001240 if pwd and not isinstance(pwd, bytes):
1241 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001243 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001244 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001245
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001246 # Make sure we have an info object
1247 if isinstance(name, ZipInfo):
1248 # 'name' is already an info object
1249 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001250 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001251 # Get info object for name
1252 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001253
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001254 self._fileRefCnt += 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001255 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001256 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001257 # Skip the file header:
1258 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001259 if len(fheader) != sizeFileHeader:
1260 raise BadZipFile("Truncated file header")
1261 fheader = struct.unpack(structFileHeader, fheader)
1262 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001263 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001264
Antoine Pitrou17babc52012-11-17 23:50:08 +01001265 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1266 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1267 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001268
Antoine Pitrou8572da52012-11-17 23:52:05 +01001269 if zinfo.flag_bits & 0x20:
1270 # Zip 2.7: compressed patched data
1271 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001272
Antoine Pitrou8572da52012-11-17 23:52:05 +01001273 if zinfo.flag_bits & 0x40:
1274 # strong encryption
1275 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001276
Antoine Pitrou17babc52012-11-17 23:50:08 +01001277 if zinfo.flag_bits & 0x800:
1278 # UTF-8 filename
1279 fname_str = fname.decode("utf-8")
1280 else:
1281 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001282
Antoine Pitrou17babc52012-11-17 23:50:08 +01001283 if fname_str != zinfo.orig_filename:
1284 raise BadZipFile(
1285 'File name in directory %r and header %r differ.'
1286 % (zinfo.orig_filename, fname))
1287
1288 # check for encrypted flag & handle password
1289 is_encrypted = zinfo.flag_bits & 0x1
1290 zd = None
1291 if is_encrypted:
1292 if not pwd:
1293 pwd = self.pwd
1294 if not pwd:
1295 raise RuntimeError("File %s is encrypted, password "
1296 "required for extraction" % name)
1297
1298 zd = _ZipDecrypter(pwd)
1299 # The first 12 bytes in the cypher stream is an encryption header
1300 # used to strengthen the algorithm. The first 11 bytes are
1301 # completely random, while the 12th contains the MSB of the CRC,
1302 # or the MSB of the file time depending on the header type
1303 # and is used to check the correctness of the password.
1304 header = zef_file.read(12)
1305 h = list(map(zd, header[0:12]))
1306 if zinfo.flag_bits & 0x8:
1307 # compare against the file type from extended local headers
1308 check_byte = (zinfo._raw_time >> 8) & 0xff
1309 else:
1310 # compare against the CRC otherwise
1311 check_byte = (zinfo.CRC >> 24) & 0xff
1312 if h[11] != check_byte:
1313 raise RuntimeError("Bad password for file", name)
1314
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001315 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001316 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001317 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001318 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319
Christian Heimes790c8232008-01-07 21:14:23 +00001320 def extract(self, member, path=None, pwd=None):
1321 """Extract a member from the archive to the current working directory,
1322 using its full name. Its file information is extracted as accurately
1323 as possible. `member' may be a filename or a ZipInfo object. You can
1324 specify a different directory using `path'.
1325 """
1326 if not isinstance(member, ZipInfo):
1327 member = self.getinfo(member)
1328
1329 if path is None:
1330 path = os.getcwd()
1331
1332 return self._extract_member(member, path, pwd)
1333
1334 def extractall(self, path=None, members=None, pwd=None):
1335 """Extract all members from the archive to the current working
1336 directory. `path' specifies a different directory to extract to.
1337 `members' is optional and must be a subset of the list returned
1338 by namelist().
1339 """
1340 if members is None:
1341 members = self.namelist()
1342
1343 for zipinfo in members:
1344 self.extract(zipinfo, path, pwd)
1345
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001346 @classmethod
1347 def _sanitize_windows_name(cls, arcname, pathsep):
1348 """Replace bad characters and remove trailing dots from parts."""
1349 table = cls._windows_illegal_name_trans_table
1350 if not table:
1351 illegal = ':<>|"?*'
1352 table = str.maketrans(illegal, '_' * len(illegal))
1353 cls._windows_illegal_name_trans_table = table
1354 arcname = arcname.translate(table)
1355 # remove trailing dots
1356 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1357 # rejoin, removing empty parts.
1358 arcname = pathsep.join(x for x in arcname if x)
1359 return arcname
1360
Christian Heimes790c8232008-01-07 21:14:23 +00001361 def _extract_member(self, member, targetpath, pwd):
1362 """Extract the ZipInfo object 'member' to a physical
1363 file on the path targetpath.
1364 """
1365 # build the destination pathname, replacing
1366 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001367 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001368
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001369 if os.path.altsep:
1370 arcname = arcname.replace(os.path.altsep, os.path.sep)
1371 # interpret absolute pathname as relative, remove drive letter or
1372 # UNC path, redundant separators, "." and ".." components.
1373 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001374 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001375 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001376 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001377 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001378 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001379 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001380
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001381 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001382 targetpath = os.path.normpath(targetpath)
1383
1384 # Create all upper directories if necessary.
1385 upperdirs = os.path.dirname(targetpath)
1386 if upperdirs and not os.path.exists(upperdirs):
1387 os.makedirs(upperdirs)
1388
Martin v. Löwis59e47792009-01-24 14:10:07 +00001389 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001390 if not os.path.isdir(targetpath):
1391 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001392 return targetpath
1393
Antoine Pitrou17babc52012-11-17 23:50:08 +01001394 with self.open(member, pwd=pwd) as source, \
1395 open(targetpath, "wb") as target:
1396 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001397
1398 return targetpath
1399
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001401 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001402 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001403 import warnings
1404 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001405 if self.mode not in ('w', 'x', 'a'):
1406 raise RuntimeError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001407 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001408 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001409 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001410 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001411 if not self._allowZip64:
1412 requires_zip64 = None
1413 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1414 requires_zip64 = "Files count"
1415 elif zinfo.file_size > ZIP64_LIMIT:
1416 requires_zip64 = "Filesize"
1417 elif zinfo.header_offset > ZIP64_LIMIT:
1418 requires_zip64 = "Zipfile size"
1419 if requires_zip64:
1420 raise LargeZipFile(requires_zip64 +
1421 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422
1423 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001424 """Put the bytes from filename into the archive under the name
1425 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001426 if not self.fp:
1427 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001428 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001429
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001430 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001431 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001432 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001433 date_time = mtime[0:6]
1434 # Create ZipInfo instance to store file information
1435 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001436 arcname = filename
1437 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1438 while arcname[0] in (os.sep, os.altsep):
1439 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001440 if isdir:
1441 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001442 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001443 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001444 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001445 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001446 else:
Tim Peterse1190062001-01-15 03:34:38 +00001447 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001448
1449 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001450 zinfo.flag_bits = 0x00
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001451 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001452 if self._seekable:
1453 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001454 zinfo.header_offset = self.fp.tell() # Start of header bytes
1455 if zinfo.compress_type == ZIP_LZMA:
1456 # Compressed data includes an end-of-stream (EOS) marker
1457 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001458
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001459 self._writecheck(zinfo)
1460 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001461
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001462 if isdir:
1463 zinfo.file_size = 0
1464 zinfo.compress_size = 0
1465 zinfo.CRC = 0
1466 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1467 self.filelist.append(zinfo)
1468 self.NameToInfo[zinfo.filename] = zinfo
1469 self.fp.write(zinfo.FileHeader(False))
1470 self.start_dir = self.fp.tell()
1471 return
1472
1473 cmpr = _get_compressor(zinfo.compress_type)
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001474 if not self._seekable:
1475 zinfo.flag_bits |= 0x08
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001476 with open(filename, "rb") as fp:
1477 # Must overwrite CRC and sizes with correct data later
1478 zinfo.CRC = CRC = 0
1479 zinfo.compress_size = compress_size = 0
1480 # Compressed size can be larger than uncompressed size
1481 zip64 = self._allowZip64 and \
1482 zinfo.file_size * 1.05 > ZIP64_LIMIT
1483 self.fp.write(zinfo.FileHeader(zip64))
1484 file_size = 0
1485 while 1:
1486 buf = fp.read(1024 * 8)
1487 if not buf:
1488 break
1489 file_size = file_size + len(buf)
1490 CRC = crc32(buf, CRC) & 0xffffffff
1491 if cmpr:
1492 buf = cmpr.compress(buf)
1493 compress_size = compress_size + len(buf)
1494 self.fp.write(buf)
1495 if cmpr:
1496 buf = cmpr.flush()
1497 compress_size = compress_size + len(buf)
1498 self.fp.write(buf)
1499 zinfo.compress_size = compress_size
1500 else:
1501 zinfo.compress_size = file_size
1502 zinfo.CRC = CRC
1503 zinfo.file_size = file_size
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001504 if zinfo.flag_bits & 0x08:
1505 # Write CRC and file sizes after the file data
1506 fmt = '<LQQ' if zip64 else '<LLL'
1507 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1508 zinfo.file_size))
1509 self.start_dir = self.fp.tell()
1510 else:
1511 if not zip64 and self._allowZip64:
1512 if file_size > ZIP64_LIMIT:
1513 raise RuntimeError('File size has increased during compressing')
1514 if compress_size > ZIP64_LIMIT:
1515 raise RuntimeError('Compressed size larger than uncompressed size')
1516 # Seek backwards and write file header (which will now include
1517 # correct CRC and file sizes)
1518 self.start_dir = self.fp.tell() # Preserve current position in file
1519 self.fp.seek(zinfo.header_offset)
1520 self.fp.write(zinfo.FileHeader(zip64))
1521 self.fp.seek(self.start_dir)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001522 self.filelist.append(zinfo)
1523 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001524
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001525 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001526 """Write a file into the archive. The contents is 'data', which
1527 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1528 it is encoded as UTF-8 first.
1529 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001530 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001531 if isinstance(data, str):
1532 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001533 if not isinstance(zinfo_or_arcname, ZipInfo):
1534 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001535 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001536 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001537 if zinfo.filename[-1] == '/':
1538 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1539 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1540 else:
1541 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001542 else:
1543 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001544
1545 if not self.fp:
1546 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001547 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001548
Guido van Rossum85825dc2007-08-27 17:03:28 +00001549 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001550 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001551 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001552 self.fp.seek(self.start_dir)
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001553 zinfo.header_offset = self.fp.tell() # Start of header data
1554 if compress_type is not None:
1555 zinfo.compress_type = compress_type
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001556 zinfo.header_offset = self.fp.tell() # Start of header data
1557 if compress_type is not None:
1558 zinfo.compress_type = compress_type
1559 if zinfo.compress_type == ZIP_LZMA:
1560 # Compressed data includes an end-of-stream (EOS) marker
1561 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001562
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001563 self._writecheck(zinfo)
1564 self._didModify = True
1565 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1566 co = _get_compressor(zinfo.compress_type)
1567 if co:
1568 data = co.compress(data) + co.flush()
1569 zinfo.compress_size = len(data) # Compressed size
1570 else:
1571 zinfo.compress_size = zinfo.file_size
1572 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1573 zinfo.compress_size > ZIP64_LIMIT
1574 if zip64 and not self._allowZip64:
1575 raise LargeZipFile("Filesize would require ZIP64 extensions")
1576 self.fp.write(zinfo.FileHeader(zip64))
1577 self.fp.write(data)
1578 if zinfo.flag_bits & 0x08:
1579 # Write CRC and file sizes after the file data
1580 fmt = '<LQQ' if zip64 else '<LLL'
1581 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1582 zinfo.file_size))
1583 self.fp.flush()
1584 self.start_dir = self.fp.tell()
1585 self.filelist.append(zinfo)
1586 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001587
1588 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001589 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001590 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001591
1592 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001593 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001594 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001595 if self.fp is None:
1596 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001597
Antoine Pitrou17babc52012-11-17 23:50:08 +01001598 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001599 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001600 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001601 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001602 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001603 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001604 finally:
1605 fp = self.fp
1606 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001607 self._fpclose(fp)
1608
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001609 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001610 for zinfo in self.filelist: # write central directory
1611 dt = zinfo.date_time
1612 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1613 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1614 extra = []
1615 if zinfo.file_size > ZIP64_LIMIT \
1616 or zinfo.compress_size > ZIP64_LIMIT:
1617 extra.append(zinfo.file_size)
1618 extra.append(zinfo.compress_size)
1619 file_size = 0xffffffff
1620 compress_size = 0xffffffff
1621 else:
1622 file_size = zinfo.file_size
1623 compress_size = zinfo.compress_size
1624
1625 if zinfo.header_offset > ZIP64_LIMIT:
1626 extra.append(zinfo.header_offset)
1627 header_offset = 0xffffffff
1628 else:
1629 header_offset = zinfo.header_offset
1630
1631 extra_data = zinfo.extra
1632 min_version = 0
1633 if extra:
1634 # Append a ZIP64 field to the extra's
1635 extra_data = struct.pack(
1636 '<HH' + 'Q'*len(extra),
1637 1, 8*len(extra), *extra) + extra_data
1638
1639 min_version = ZIP64_VERSION
1640
1641 if zinfo.compress_type == ZIP_BZIP2:
1642 min_version = max(BZIP2_VERSION, min_version)
1643 elif zinfo.compress_type == ZIP_LZMA:
1644 min_version = max(LZMA_VERSION, min_version)
1645
1646 extract_version = max(min_version, zinfo.extract_version)
1647 create_version = max(min_version, zinfo.create_version)
1648 try:
1649 filename, flag_bits = zinfo._encodeFilenameFlags()
1650 centdir = struct.pack(structCentralDir,
1651 stringCentralDir, create_version,
1652 zinfo.create_system, extract_version, zinfo.reserved,
1653 flag_bits, zinfo.compress_type, dostime, dosdate,
1654 zinfo.CRC, compress_size, file_size,
1655 len(filename), len(extra_data), len(zinfo.comment),
1656 0, zinfo.internal_attr, zinfo.external_attr,
1657 header_offset)
1658 except DeprecationWarning:
1659 print((structCentralDir, stringCentralDir, create_version,
1660 zinfo.create_system, extract_version, zinfo.reserved,
1661 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1662 zinfo.CRC, compress_size, file_size,
1663 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1664 0, zinfo.internal_attr, zinfo.external_attr,
1665 header_offset), file=sys.stderr)
1666 raise
1667 self.fp.write(centdir)
1668 self.fp.write(filename)
1669 self.fp.write(extra_data)
1670 self.fp.write(zinfo.comment)
1671
1672 pos2 = self.fp.tell()
1673 # Write end-of-zip-archive record
1674 centDirCount = len(self.filelist)
1675 centDirSize = pos2 - self.start_dir
1676 centDirOffset = self.start_dir
1677 requires_zip64 = None
1678 if centDirCount > ZIP_FILECOUNT_LIMIT:
1679 requires_zip64 = "Files count"
1680 elif centDirOffset > ZIP64_LIMIT:
1681 requires_zip64 = "Central directory offset"
1682 elif centDirSize > ZIP64_LIMIT:
1683 requires_zip64 = "Central directory size"
1684 if requires_zip64:
1685 # Need to write the ZIP64 end-of-archive records
1686 if not self._allowZip64:
1687 raise LargeZipFile(requires_zip64 +
1688 " would require ZIP64 extensions")
1689 zip64endrec = struct.pack(
1690 structEndArchive64, stringEndArchive64,
1691 44, 45, 45, 0, 0, centDirCount, centDirCount,
1692 centDirSize, centDirOffset)
1693 self.fp.write(zip64endrec)
1694
1695 zip64locrec = struct.pack(
1696 structEndArchive64Locator,
1697 stringEndArchive64Locator, 0, pos2, 1)
1698 self.fp.write(zip64locrec)
1699 centDirCount = min(centDirCount, 0xFFFF)
1700 centDirSize = min(centDirSize, 0xFFFFFFFF)
1701 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1702
1703 endrec = struct.pack(structEndArchive, stringEndArchive,
1704 0, 0, centDirCount, centDirCount,
1705 centDirSize, centDirOffset, len(self._comment))
1706 self.fp.write(endrec)
1707 self.fp.write(self._comment)
1708 self.fp.flush()
1709
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001710 def _fpclose(self, fp):
1711 assert self._fileRefCnt > 0
1712 self._fileRefCnt -= 1
1713 if not self._fileRefCnt and not self._filePassed:
1714 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001715
1716
1717class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001718 """Class to create ZIP archives with Python library files and packages."""
1719
Georg Brandl8334fd92010-12-04 10:26:46 +00001720 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001721 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001722 ZipFile.__init__(self, file, mode=mode, compression=compression,
1723 allowZip64=allowZip64)
1724 self._optimize = optimize
1725
Christian Tismer59202e52013-10-21 03:59:23 +02001726 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001727 """Add all files from "pathname" to the ZIP archive.
1728
Fred Drake484d7352000-10-02 21:14:52 +00001729 If pathname is a package directory, search the directory and
1730 all package subdirectories recursively for all *.py and enter
1731 the modules into the archive. If pathname is a plain
1732 directory, listdir *.py and enter all modules. Else, pathname
1733 must be a Python *.py file and the module will be put into the
1734 archive. Added modules are always module.pyo or module.pyc.
1735 This method will compile the module.py into module.pyc if
1736 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001737 If filterfunc(pathname) is given, it is called with every argument.
1738 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001739 """
Christian Tismer59202e52013-10-21 03:59:23 +02001740 if filterfunc and not filterfunc(pathname):
1741 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001742 label = 'path' if os.path.isdir(pathname) else 'file'
1743 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001744 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001745 dir, name = os.path.split(pathname)
1746 if os.path.isdir(pathname):
1747 initname = os.path.join(pathname, "__init__.py")
1748 if os.path.isfile(initname):
1749 # This is a package directory, add it
1750 if basename:
1751 basename = "%s/%s" % (basename, name)
1752 else:
1753 basename = name
1754 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001755 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001756 fname, arcname = self._get_codename(initname[0:-3], basename)
1757 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001758 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001759 self.write(fname, arcname)
1760 dirlist = os.listdir(pathname)
1761 dirlist.remove("__init__.py")
1762 # Add all *.py files and package subdirectories
1763 for filename in dirlist:
1764 path = os.path.join(pathname, filename)
1765 root, ext = os.path.splitext(filename)
1766 if os.path.isdir(path):
1767 if os.path.isfile(os.path.join(path, "__init__.py")):
1768 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001769 self.writepy(path, basename,
1770 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001771 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001772 if filterfunc and not filterfunc(path):
1773 if self.debug:
1774 print('file "%s" skipped by filterfunc' % path)
1775 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001776 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001777 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001778 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001779 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001780 self.write(fname, arcname)
1781 else:
1782 # This is NOT a package directory, add its files at top level
1783 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001784 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001785 for filename in os.listdir(pathname):
1786 path = os.path.join(pathname, filename)
1787 root, ext = os.path.splitext(filename)
1788 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001789 if filterfunc and not filterfunc(path):
1790 if self.debug:
1791 print('file "%s" skipped by filterfunc' % path)
1792 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001793 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001794 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001795 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001796 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001797 self.write(fname, arcname)
1798 else:
1799 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001800 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001801 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001802 fname, arcname = self._get_codename(pathname[0:-3], basename)
1803 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001804 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001805 self.write(fname, arcname)
1806
1807 def _get_codename(self, pathname, basename):
1808 """Return (filename, archivename) for the path.
1809
Fred Drake484d7352000-10-02 21:14:52 +00001810 Given a module name path, return the correct file path and
1811 archive name, compiling if necessary. For example, given
1812 /python/lib/string, return (/python/lib/string.pyc, string).
1813 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001814 def _compile(file, optimize=-1):
1815 import py_compile
1816 if self.debug:
1817 print("Compiling", file)
1818 try:
1819 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001820 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001821 print(err.msg)
1822 return False
1823 return True
1824
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001825 file_py = pathname + ".py"
1826 file_pyc = pathname + ".pyc"
1827 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001828 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1829 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001830 if self._optimize == -1:
1831 # legacy mode: use whatever file is present
1832 if (os.path.isfile(file_pyo) and
1833 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1834 # Use .pyo file.
1835 arcname = fname = file_pyo
1836 elif (os.path.isfile(file_pyc) and
1837 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1838 # Use .pyc file.
1839 arcname = fname = file_pyc
1840 elif (os.path.isfile(pycache_pyc) and
1841 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1842 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1843 # file name in the archive.
1844 fname = pycache_pyc
1845 arcname = file_pyc
1846 elif (os.path.isfile(pycache_pyo) and
1847 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1848 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1849 # file name in the archive.
1850 fname = pycache_pyo
1851 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001852 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001853 # Compile py into PEP 3147 pyc file.
1854 if _compile(file_py):
1855 fname = (pycache_pyc if __debug__ else pycache_pyo)
1856 arcname = (file_pyc if __debug__ else file_pyo)
1857 else:
1858 fname = arcname = file_py
1859 else:
1860 # new mode: use given optimization level
1861 if self._optimize == 0:
1862 fname = pycache_pyc
1863 arcname = file_pyc
1864 else:
1865 fname = pycache_pyo
1866 arcname = file_pyo
1867 if not (os.path.isfile(fname) and
1868 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1869 if not _compile(file_py, optimize=self._optimize):
1870 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001871 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001872 if basename:
1873 archivename = "%s/%s" % (basename, archivename)
1874 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001875
1876
1877def main(args = None):
1878 import textwrap
1879 USAGE=textwrap.dedent("""\
1880 Usage:
1881 zipfile.py -l zipfile.zip # Show listing of a zipfile
1882 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1883 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1884 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1885 """)
1886 if args is None:
1887 args = sys.argv[1:]
1888
1889 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001890 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001891 sys.exit(1)
1892
1893 if args[0] == '-l':
1894 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001895 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001896 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001897 with ZipFile(args[1], 'r') as zf:
1898 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001899
1900 elif args[0] == '-t':
1901 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001902 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001903 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001904 with ZipFile(args[1], 'r') as zf:
1905 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001906 if badfile:
1907 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001908 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001909
1910 elif args[0] == '-e':
1911 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001912 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001913 sys.exit(1)
1914
Antoine Pitrou17babc52012-11-17 23:50:08 +01001915 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001916 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001917
1918 elif args[0] == '-c':
1919 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001920 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001921 sys.exit(1)
1922
1923 def addToZip(zf, path, zippath):
1924 if os.path.isfile(path):
1925 zf.write(path, zippath, ZIP_DEFLATED)
1926 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001927 if zippath:
1928 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001929 for nm in os.listdir(path):
1930 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001931 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001932 # else: ignore
1933
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001934 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001935 for path in args[2:]:
1936 zippath = os.path.basename(path)
1937 if not zippath:
1938 zippath = os.path.basename(os.path.dirname(path))
1939 if zippath in ('', os.curdir, os.pardir):
1940 zippath = ''
1941 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001942
1943if __name__ == "__main__":
1944 main()