blob: 93171358e41167b15686fbed5cb734aea5cbc5e6 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
15
Serhiy Storchaka9e777732015-10-10 19:43:32 +030016try:
17 import threading
18except ImportError:
19 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020
21try:
Tim Peterse1190062001-01-15 03:34:38 +000022 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040024except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000026 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028try:
29 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040030except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020031 bz2 = None
32
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033try:
34 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040035except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 lzma = None
37
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020038__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020039 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000040 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000041
Georg Brandl4d540882010-10-28 06:42:33 +000042class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000043 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
45
46class LargeZipFile(Exception):
47 """
48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
49 and those extensions are disabled.
50 """
51
Georg Brandl4d540882010-10-28 06:42:33 +000052error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
53
Guido van Rossum32abe6f2000-03-31 17:30:02 +000054
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000055ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030056ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000057ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000058
Guido van Rossum32abe6f2000-03-31 17:30:02 +000059# constants for Zip file compression methods
60ZIP_STORED = 0
61ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020063ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000064# Other ZIP compression methods not supported
65
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020066DEFAULT_VERSION = 20
67ZIP64_VERSION = 45
68BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020070# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020071MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020072
Martin v. Löwisb09b8442008-07-03 14:13:42 +000073# Below are some formats and associated data for reading/writing headers using
74# the struct module. The names and structures of headers/records are those used
75# in the PKWARE description of the ZIP file format:
76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
77# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000078
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079# The "end of central directory" structure, magic number, size, and indices
80# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000081structEndArchive = b"<4s4H2LH"
82stringEndArchive = b"PK\005\006"
83sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000084
85_ECD_SIGNATURE = 0
86_ECD_DISK_NUMBER = 1
87_ECD_DISK_START = 2
88_ECD_ENTRIES_THIS_DISK = 3
89_ECD_ENTRIES_TOTAL = 4
90_ECD_SIZE = 5
91_ECD_OFFSET = 6
92_ECD_COMMENT_SIZE = 7
93# These last two indices are not part of the structure as defined in the
94# spec, but they are used internally by this module as a convenience
95_ECD_COMMENT = 8
96_ECD_LOCATION = 9
97
98# The "central directory" structure, magic number, size, and indices
99# of entries in the structure (section V.F in the format document)
100structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000101stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102sizeCentralDir = struct.calcsize(structCentralDir)
103
Fred Drake3e038e52001-02-28 17:56:26 +0000104# indexes of entries in the central directory structure
105_CD_SIGNATURE = 0
106_CD_CREATE_VERSION = 1
107_CD_CREATE_SYSTEM = 2
108_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000110_CD_FLAG_BITS = 5
111_CD_COMPRESS_TYPE = 6
112_CD_TIME = 7
113_CD_DATE = 8
114_CD_CRC = 9
115_CD_COMPRESSED_SIZE = 10
116_CD_UNCOMPRESSED_SIZE = 11
117_CD_FILENAME_LENGTH = 12
118_CD_EXTRA_FIELD_LENGTH = 13
119_CD_COMMENT_LENGTH = 14
120_CD_DISK_NUMBER_START = 15
121_CD_INTERNAL_FILE_ATTRIBUTES = 16
122_CD_EXTERNAL_FILE_ATTRIBUTES = 17
123_CD_LOCAL_HEADER_OFFSET = 18
124
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125# The "local file header" structure, magic number, size, and indices
126# (section V.A in the format document)
127structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129sizeFileHeader = struct.calcsize(structFileHeader)
130
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_SIGNATURE = 0
132_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000133_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000134_FH_GENERAL_PURPOSE_FLAG_BITS = 3
135_FH_COMPRESSION_METHOD = 4
136_FH_LAST_MOD_TIME = 5
137_FH_LAST_MOD_DATE = 6
138_FH_CRC = 7
139_FH_COMPRESSED_SIZE = 8
140_FH_UNCOMPRESSED_SIZE = 9
141_FH_FILENAME_LENGTH = 10
142_FH_EXTRA_FIELD_LENGTH = 11
143
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000145structEndArchive64Locator = "<4sLQL"
146stringEndArchive64Locator = b"PK\x06\x07"
147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000148
149# The "Zip64 end of central directory" record, magic number, size, and indices
150# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000151structEndArchive64 = "<4sQ2H2L4Q"
152stringEndArchive64 = b"PK\x06\x06"
153sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000154
155_CD64_SIGNATURE = 0
156_CD64_DIRECTORY_RECSIZE = 1
157_CD64_CREATE_VERSION = 2
158_CD64_EXTRACT_VERSION = 3
159_CD64_DISK_NUMBER = 4
160_CD64_DISK_NUMBER_START = 5
161_CD64_NUMBER_ENTRIES_THIS_DISK = 6
162_CD64_NUMBER_ENTRIES_TOTAL = 7
163_CD64_DIRECTORY_SIZE = 8
164_CD64_OFFSET_START_CENTDIR = 9
165
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000168 if _EndRecData(fp):
169 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200170 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000172 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000173
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000174def is_zipfile(filename):
175 """Quickly see if a file is a ZIP file by checking the magic number.
176
177 The filename argument may be a file or file-like object too.
178 """
179 result = False
180 try:
181 if hasattr(filename, "read"):
182 result = _check_zipfile(fp=filename)
183 else:
184 with open(filename, "rb") as fp:
185 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200186 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000187 pass
188 return result
189
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000190def _EndRecData64(fpin, offset, endrec):
191 """
192 Read the ZIP64 end-of-archive records and use that to update endrec
193 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 try:
195 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200196 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000197 # If the seek fails, the file is not large enough to contain a ZIP64
198 # end-of-archive record, so just return the end record we were given.
199 return endrec
200
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000201 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200202 if len(data) != sizeEndCentDir64Locator:
203 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000204 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
205 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206 return endrec
207
208 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000209 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000210
211 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000212 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
213 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200214 if len(data) != sizeEndCentDir64:
215 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200217 dircount, dircount2, dirsize, diroffset = \
218 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000219 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000220 return endrec
221
222 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000223 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000224 endrec[_ECD_DISK_NUMBER] = disk_num
225 endrec[_ECD_DISK_START] = disk_dir
226 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
227 endrec[_ECD_ENTRIES_TOTAL] = dircount2
228 endrec[_ECD_SIZE] = dirsize
229 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 return endrec
231
232
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233def _EndRecData(fpin):
234 """Return data from the "End of Central Directory" record, or None.
235
236 The data is a list of the nine items in the ZIP "End of central dir"
237 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238
239 # Determine file size
240 fpin.seek(0, 2)
241 filesize = fpin.tell()
242
243 # Check to see if this is ZIP file with no archive comment (the
244 # "end of central directory" structure should be the last item in the
245 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 try:
247 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200248 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000249 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200251 if (len(data) == sizeEndCentDir and
252 data[0:4] == stringEndArchive and
253 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000256 endrec=list(endrec)
257
258 # Append a blank comment and record start offset
259 endrec.append(b"")
260 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000262 # Try to read the "Zip64 end of central directory" structure
263 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000264
265 # Either this is not a ZIP file, or it is a ZIP file with an archive
266 # comment. Search the end of the file for the "end of central directory"
267 # record signature. The comment is the last item in the ZIP file and may be
268 # up to 64K long. It is assumed that the "end of central directory" magic
269 # number does not appear in the comment.
270 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
271 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000273 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000274 if start >= 0:
275 # found the magic number; attempt to unpack and interpret
276 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200277 if len(recData) != sizeEndCentDir:
278 # Zip file is corrupted.
279 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000280 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400281 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
282 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
283 endrec.append(comment)
284 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000285
R David Murray4fbb9db2011-06-09 15:50:51 -0400286 # Try to read the "Zip64 end of central directory" structure
287 return _EndRecData64(fpin, maxCommentStart + start - filesize,
288 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000289
290 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200291 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000292
Fred Drake484d7352000-10-02 21:14:52 +0000293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000295 """Class with attributes describing each file in the ZIP archive."""
296
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200298 'orig_filename',
299 'filename',
300 'date_time',
301 'compress_type',
302 'comment',
303 'extra',
304 'create_system',
305 'create_version',
306 'extract_version',
307 'reserved',
308 'flag_bits',
309 'volume',
310 'internal_attr',
311 'external_attr',
312 'header_offset',
313 'CRC',
314 'compress_size',
315 'file_size',
316 '_raw_time',
317 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000320 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000321
322 # Terminate the file name at the first null byte. Null bytes in file
323 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000324 null_byte = filename.find(chr(0))
325 if null_byte >= 0:
326 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000327 # This is used to ensure paths in generated ZIP files always use
328 # forward slashes as the directory separator, as required by the
329 # ZIP format specification.
330 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000331 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332
Greg Ward8e36d282003-06-18 00:53:06 +0000333 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000334 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800335
336 if date_time[0] < 1980:
337 raise ValueError('ZIP does not support timestamps before 1980')
338
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000340 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000341 self.comment = b"" # Comment for each file
342 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000343 if sys.platform == 'win32':
344 self.create_system = 0 # System which created ZIP archive
345 else:
346 # Assume everything else is unix-y
347 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200348 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
349 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000350 self.reserved = 0 # Must be zero
351 self.flag_bits = 0 # ZIP flag bits
352 self.volume = 0 # Volume number of file header
353 self.internal_attr = 0 # Internal attributes
354 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000356 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000357 # CRC CRC-32 of the uncompressed file
358 # compress_size Size of the compressed file
359 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000360
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200361 def __repr__(self):
362 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
363 if self.compress_type != ZIP_STORED:
364 result.append(' compress_type=%s' %
365 compressor_names.get(self.compress_type,
366 self.compress_type))
367 hi = self.external_attr >> 16
368 lo = self.external_attr & 0xFFFF
369 if hi:
370 result.append(' filemode=%r' % stat.filemode(hi))
371 if lo:
372 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200373 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200374 if not isdir or self.file_size:
375 result.append(' file_size=%r' % self.file_size)
376 if ((not isdir or self.compress_size) and
377 (self.compress_type != ZIP_STORED or
378 self.file_size != self.compress_size)):
379 result.append(' compress_size=%r' % self.compress_size)
380 result.append('>')
381 return ''.join(result)
382
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200383 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000384 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 dt = self.date_time
386 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000387 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000389 # Set these to zero because we write them after the file data
390 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391 else:
Tim Peterse1190062001-01-15 03:34:38 +0000392 CRC = self.CRC
393 compress_size = self.compress_size
394 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395
396 extra = self.extra
397
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200398 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200399 if zip64 is None:
400 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
401 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000402 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000403 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200404 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200405 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
406 if not zip64:
407 raise LargeZipFile("Filesize would require ZIP64 extensions")
408 # File is larger than what fits into a 4 byte integer,
409 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000410 file_size = 0xffffffff
411 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200414 if self.compress_type == ZIP_BZIP2:
415 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200416 elif self.compress_type == ZIP_LZMA:
417 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200418
419 self.extract_version = max(min_version, self.extract_version)
420 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000421 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000422 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200423 self.extract_version, self.reserved, flag_bits,
424 self.compress_type, dostime, dosdate, CRC,
425 compress_size, file_size,
426 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000427 return header + filename + extra
428
429 def _encodeFilenameFlags(self):
430 try:
431 return self.filename.encode('ascii'), self.flag_bits
432 except UnicodeEncodeError:
433 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000434
435 def _decodeExtra(self):
436 # Try to decode the extra field.
437 extra = self.extra
438 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700439 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000440 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000441 if tp == 1:
442 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000447 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448 elif ln == 0:
449 counts = ()
450 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300451 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000452
453 idx = 0
454
455 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000456 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000457 self.file_size = counts[idx]
458 idx += 1
459
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000460 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000461 self.compress_size = counts[idx]
462 idx += 1
463
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000464 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 old = self.header_offset
466 self.header_offset = counts[idx]
467 idx+=1
468
469 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000470
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200471 @classmethod
472 def from_file(cls, filename, arcname=None):
473 """Construct an appropriate ZipInfo for a file on the filesystem.
474
475 filename should be the path to a file or directory on the filesystem.
476
477 arcname is the name which it will have within the archive (by default,
478 this will be the same as filename, but without a drive letter and with
479 leading path separators removed).
480 """
481 st = os.stat(filename)
482 isdir = stat.S_ISDIR(st.st_mode)
483 mtime = time.localtime(st.st_mtime)
484 date_time = mtime[0:6]
485 # Create ZipInfo instance to store file information
486 if arcname is None:
487 arcname = filename
488 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
489 while arcname[0] in (os.sep, os.altsep):
490 arcname = arcname[1:]
491 if isdir:
492 arcname += '/'
493 zinfo = cls(arcname, date_time)
494 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
495 if isdir:
496 zinfo.file_size = 0
497 zinfo.external_attr |= 0x10 # MS-DOS directory flag
498 else:
499 zinfo.file_size = st.st_size
500
501 return zinfo
502
503 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300504 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200505 return self.filename[-1] == '/'
506
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000507
Thomas Wouterscf297e42007-02-23 15:07:44 +0000508class _ZipDecrypter:
509 """Class to handle decryption of files stored within a ZIP archive.
510
511 ZIP supports a password-based form of encryption. Even though known
512 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000513 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000514
515 Usage:
516 zd = _ZipDecrypter(mypwd)
517 plain_char = zd(cypher_char)
518 plain_text = map(zd, cypher_text)
519 """
520
521 def _GenerateCRCTable():
522 """Generate a CRC-32 table.
523
524 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
525 internal keys. We noticed that a direct implementation is faster than
526 relying on binascii.crc32().
527 """
528 poly = 0xedb88320
529 table = [0] * 256
530 for i in range(256):
531 crc = i
532 for j in range(8):
533 if crc & 1:
534 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
535 else:
536 crc = ((crc >> 1) & 0x7FFFFFFF)
537 table[i] = crc
538 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500539 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540
541 def _crc32(self, ch, crc):
542 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000543 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
545 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500546 if _ZipDecrypter.crctable is None:
547 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000548 self.key0 = 305419896
549 self.key1 = 591751049
550 self.key2 = 878082192
551 for p in pwd:
552 self._UpdateKeys(p)
553
554 def _UpdateKeys(self, c):
555 self.key0 = self._crc32(c, self.key0)
556 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
557 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000558 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000559
560 def __call__(self, c):
561 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000562 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563 k = self.key2 | 2
564 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565 self._UpdateKeys(c)
566 return c
567
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200568
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200569class LZMACompressor:
570
571 def __init__(self):
572 self._comp = None
573
574 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200575 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200576 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200577 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200578 ])
579 return struct.pack('<BBH', 9, 4, len(props)) + props
580
581 def compress(self, data):
582 if self._comp is None:
583 return self._init() + self._comp.compress(data)
584 return self._comp.compress(data)
585
586 def flush(self):
587 if self._comp is None:
588 return self._init() + self._comp.flush()
589 return self._comp.flush()
590
591
592class LZMADecompressor:
593
594 def __init__(self):
595 self._decomp = None
596 self._unconsumed = b''
597 self.eof = False
598
599 def decompress(self, data):
600 if self._decomp is None:
601 self._unconsumed += data
602 if len(self._unconsumed) <= 4:
603 return b''
604 psize, = struct.unpack('<H', self._unconsumed[2:4])
605 if len(self._unconsumed) <= 4 + psize:
606 return b''
607
608 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200609 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
610 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611 ])
612 data = self._unconsumed[4 + psize:]
613 del self._unconsumed
614
615 result = self._decomp.decompress(data)
616 self.eof = self._decomp.eof
617 return result
618
619
620compressor_names = {
621 0: 'store',
622 1: 'shrink',
623 2: 'reduce',
624 3: 'reduce',
625 4: 'reduce',
626 5: 'reduce',
627 6: 'implode',
628 7: 'tokenize',
629 8: 'deflate',
630 9: 'deflate64',
631 10: 'implode',
632 12: 'bzip2',
633 14: 'lzma',
634 18: 'terse',
635 19: 'lz77',
636 97: 'wavpack',
637 98: 'ppmd',
638}
639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640def _check_compression(compression):
641 if compression == ZIP_STORED:
642 pass
643 elif compression == ZIP_DEFLATED:
644 if not zlib:
645 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200646 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647 elif compression == ZIP_BZIP2:
648 if not bz2:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200651 elif compression == ZIP_LZMA:
652 if not lzma:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200655 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300656 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200657
658
659def _get_compressor(compress_type):
660 if compress_type == ZIP_DEFLATED:
661 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200662 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200663 elif compress_type == ZIP_BZIP2:
664 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200665 elif compress_type == ZIP_LZMA:
666 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 else:
668 return None
669
670
671def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200672 if compress_type == ZIP_STORED:
673 return None
674 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200675 return zlib.decompressobj(-15)
676 elif compress_type == ZIP_BZIP2:
677 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200678 elif compress_type == ZIP_LZMA:
679 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200680 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200681 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200682 if descr:
683 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
684 else:
685 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200686
687
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200688class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300689 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200690 self._file = file
691 self._pos = pos
692 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200693 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300694 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200695
696 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200697 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300698 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300699 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300700 "is an open writing handle on it. "
701 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200702 self._file.seek(self._pos)
703 data = self._file.read(n)
704 self._pos = self._file.tell()
705 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200706
707 def close(self):
708 if self._file is not None:
709 fileobj = self._file
710 self._file = None
711 self._close(fileobj)
712
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200713# Provide the tell method for unseekable stream
714class _Tellable:
715 def __init__(self, fp):
716 self.fp = fp
717 self.offset = 0
718
719 def write(self, data):
720 n = self.fp.write(data)
721 self.offset += n
722 return n
723
724 def tell(self):
725 return self.offset
726
727 def flush(self):
728 self.fp.flush()
729
730 def close(self):
731 self.fp.close()
732
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200733
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000734class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000735 """File-like object for reading an archive member.
736 Is returned by ZipFile.open().
737 """
738
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000739 # Max size supported by decompressor.
740 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000741
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000742 # Read from compressed files in 4k blocks.
743 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000744
Łukasz Langae94980a2010-11-22 23:31:26 +0000745 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
746 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000747 self._fileobj = fileobj
748 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000749 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000750
Ezio Melotti92b47432010-01-28 01:44:41 +0000751 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000752 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200753 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000754
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200755 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000756
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200757 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758 self._readbuffer = b''
759 self._offset = 0
760
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000761 self.newlines = None
762
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000763 # Adjust read size for encrypted files since the first 12 bytes
764 # are for the encryption/password information.
765 if self._decrypter is not None:
766 self._compress_left -= 12
767
768 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769 self.name = zipinfo.filename
770
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000771 if hasattr(zipinfo, 'CRC'):
772 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000773 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000774 else:
775 self._expected_crc = None
776
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200777 def __repr__(self):
778 result = ['<%s.%s' % (self.__class__.__module__,
779 self.__class__.__qualname__)]
780 if not self.closed:
781 result.append(' name=%r mode=%r' % (self.name, self.mode))
782 if self._compress_type != ZIP_STORED:
783 result.append(' compress_type=%s' %
784 compressor_names.get(self._compress_type,
785 self._compress_type))
786 else:
787 result.append(' [closed]')
788 result.append('>')
789 return ''.join(result)
790
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000791 def readline(self, limit=-1):
792 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000794 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000795 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796
Serhiy Storchakae670be22016-06-11 19:32:44 +0300797 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798 # Shortcut common case - newline found in buffer.
799 i = self._readbuffer.find(b'\n', self._offset) + 1
800 if i > 0:
801 line = self._readbuffer[self._offset: i]
802 self._offset = i
803 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804
Serhiy Storchakae670be22016-06-11 19:32:44 +0300805 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000806
807 def peek(self, n=1):
808 """Returns buffered bytes without advancing the position."""
809 if n > len(self._readbuffer) - self._offset:
810 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200811 if len(chunk) > self._offset:
812 self._readbuffer = chunk + self._readbuffer[self._offset:]
813 self._offset = 0
814 else:
815 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000816
817 # Return up to 512 bytes to reduce allocation overhead for tight loops.
818 return self._readbuffer[self._offset: self._offset + 512]
819
820 def readable(self):
821 return True
822
823 def read(self, n=-1):
824 """Read and return up to n bytes.
825 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200827 if n is None or n < 0:
828 buf = self._readbuffer[self._offset:]
829 self._readbuffer = b''
830 self._offset = 0
831 while not self._eof:
832 buf += self._read1(self.MAX_N)
833 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
Antoine Pitrou78157b32012-06-23 16:44:48 +0200835 end = n + self._offset
836 if end < len(self._readbuffer):
837 buf = self._readbuffer[self._offset:end]
838 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200839 return buf
840
Antoine Pitrou78157b32012-06-23 16:44:48 +0200841 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200842 buf = self._readbuffer[self._offset:]
843 self._readbuffer = b''
844 self._offset = 0
845 while n > 0 and not self._eof:
846 data = self._read1(n)
847 if n < len(data):
848 self._readbuffer = data
849 self._offset = n
850 buf += data[:n]
851 break
852 buf += data
853 n -= len(data)
854 return buf
855
856 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000857 # Update the CRC using the given data.
858 if self._expected_crc is None:
859 # No need to compute the CRC if we don't have a reference value
860 return
Martin Panterb82032f2015-12-11 05:19:29 +0000861 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000862 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200863 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000864 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000865
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000866 def read1(self, n):
867 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200869 if n is None or n < 0:
870 buf = self._readbuffer[self._offset:]
871 self._readbuffer = b''
872 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300873 while not self._eof:
874 data = self._read1(self.MAX_N)
875 if data:
876 buf += data
877 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200878 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879
Antoine Pitrou78157b32012-06-23 16:44:48 +0200880 end = n + self._offset
881 if end < len(self._readbuffer):
882 buf = self._readbuffer[self._offset:end]
883 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200884 return buf
885
Antoine Pitrou78157b32012-06-23 16:44:48 +0200886 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200887 buf = self._readbuffer[self._offset:]
888 self._readbuffer = b''
889 self._offset = 0
890 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300891 while not self._eof:
892 data = self._read1(n)
893 if n < len(data):
894 self._readbuffer = data
895 self._offset = n
896 buf += data[:n]
897 break
898 if data:
899 buf += data
900 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200901 return buf
902
903 def _read1(self, n):
904 # Read up to n compressed bytes with at most one read() system call,
905 # decrypt and decompress them.
906 if self._eof or n <= 0:
907 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000909 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200910 if self._compress_type == ZIP_DEFLATED:
911 ## Handle unconsumed data.
912 data = self._decompressor.unconsumed_tail
913 if n > len(data):
914 data += self._read2(n - len(data))
915 else:
916 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200918 if self._compress_type == ZIP_STORED:
919 self._eof = self._compress_left <= 0
920 elif self._compress_type == ZIP_DEFLATED:
921 n = max(n, self.MIN_READ_SIZE)
922 data = self._decompressor.decompress(data, n)
923 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200924 self._compress_left <= 0 and
925 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200926 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000927 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200928 else:
929 data = self._decompressor.decompress(data)
930 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200932 data = data[:self._left]
933 self._left -= len(data)
934 if self._left <= 0:
935 self._eof = True
936 self._update_crc(data)
937 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000938
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200939 def _read2(self, n):
940 if self._compress_left <= 0:
941 return b''
942
943 n = max(n, self.MIN_READ_SIZE)
944 n = min(n, self._compress_left)
945
946 data = self._fileobj.read(n)
947 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200948 if not data:
949 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200950
951 if self._decrypter is not None:
952 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000953 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954
Łukasz Langae94980a2010-11-22 23:31:26 +0000955 def close(self):
956 try:
957 if self._close_fileobj:
958 self._fileobj.close()
959 finally:
960 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000961
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000962
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300963class _ZipWriteFile(io.BufferedIOBase):
964 def __init__(self, zf, zinfo, zip64):
965 self._zinfo = zinfo
966 self._zip64 = zip64
967 self._zipfile = zf
968 self._compressor = _get_compressor(zinfo.compress_type)
969 self._file_size = 0
970 self._compress_size = 0
971 self._crc = 0
972
973 @property
974 def _fileobj(self):
975 return self._zipfile.fp
976
977 def writable(self):
978 return True
979
980 def write(self, data):
981 nbytes = len(data)
982 self._file_size += nbytes
983 self._crc = crc32(data, self._crc)
984 if self._compressor:
985 data = self._compressor.compress(data)
986 self._compress_size += len(data)
987 self._fileobj.write(data)
988 return nbytes
989
990 def close(self):
991 super().close()
992 # Flush any data from the compressor, and update header info
993 if self._compressor:
994 buf = self._compressor.flush()
995 self._compress_size += len(buf)
996 self._fileobj.write(buf)
997 self._zinfo.compress_size = self._compress_size
998 else:
999 self._zinfo.compress_size = self._file_size
1000 self._zinfo.CRC = self._crc
1001 self._zinfo.file_size = self._file_size
1002
1003 # Write updated header info
1004 if self._zinfo.flag_bits & 0x08:
1005 # Write CRC and file sizes after the file data
1006 fmt = '<LQQ' if self._zip64 else '<LLL'
1007 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1008 self._zinfo.compress_size, self._zinfo.file_size))
1009 self._zipfile.start_dir = self._fileobj.tell()
1010 else:
1011 if not self._zip64:
1012 if self._file_size > ZIP64_LIMIT:
1013 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1014 'limit')
1015 if self._compress_size > ZIP64_LIMIT:
1016 raise RuntimeError('Compressed size unexpectedly exceeded '
1017 'ZIP64 limit')
1018 # Seek backwards and write file header (which will now include
1019 # correct CRC and file sizes)
1020
1021 # Preserve current position in file
1022 self._zipfile.start_dir = self._fileobj.tell()
1023 self._fileobj.seek(self._zinfo.header_offset)
1024 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1025 self._fileobj.seek(self._zipfile.start_dir)
1026
1027 self._zipfile._writing = False
1028
1029 # Successfully written: Add file to our caches
1030 self._zipfile.filelist.append(self._zinfo)
1031 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1032
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001033class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001034 """ Class with methods to open, read, write, close, list zip files.
1035
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001036 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001037
Fred Drake3d9091e2001-03-26 15:49:24 +00001038 file: Either the path to the file, or a file-like object.
1039 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001040 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1041 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001042 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1043 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001044 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1045 needed, otherwise it will raise an exception when this would
1046 be necessary.
1047
Fred Drake3d9091e2001-03-26 15:49:24 +00001048 """
Fred Drake484d7352000-10-02 21:14:52 +00001049
Fred Drake90eac282001-02-28 05:29:34 +00001050 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001051 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001052
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001053 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001054 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1055 or append 'a'."""
1056 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001057 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001058
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001059 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001060
1061 self._allowZip64 = allowZip64
1062 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001063 self.debug = 0 # Level of printing: 0 through 3
1064 self.NameToInfo = {} # Find file info given name
1065 self.filelist = [] # List of ZipInfo instances for archive
1066 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001067 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001068 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001069 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001070
Fred Drake3d9091e2001-03-26 15:49:24 +00001071 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001072 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001073 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001074 self._filePassed = 0
1075 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001076 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1077 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001078 filemode = modeDict[mode]
1079 while True:
1080 try:
1081 self.fp = io.open(file, filemode)
1082 except OSError:
1083 if filemode in modeDict:
1084 filemode = modeDict[filemode]
1085 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001086 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001087 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001088 else:
1089 self._filePassed = 1
1090 self.fp = file
1091 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001092 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001093 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001094 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001095 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001096
Antoine Pitrou17babc52012-11-17 23:50:08 +01001097 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001098 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001099 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001100 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001101 # set the modified flag so central directory gets written
1102 # even if no files are added to the archive
1103 self._didModify = True
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001104 self._start_disk = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001105 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001106 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001107 except (AttributeError, OSError):
1108 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001109 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001110 self._seekable = False
1111 else:
1112 # Some file-like objects can provide tell() but not seek()
1113 try:
1114 self.fp.seek(self.start_dir)
1115 except (AttributeError, OSError):
1116 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001117 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001118 try:
1119 # See if file is a zip file
1120 self._RealGetContents()
1121 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001122 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001123 except BadZipFile:
1124 # file is not a zip file, just append
1125 self.fp.seek(0, 2)
1126
1127 # set the modified flag so central directory gets written
1128 # even if no files are added to the archive
1129 self._didModify = True
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001130 self.start_dir = self._start_disk = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001131 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001132 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001133 except:
1134 fp = self.fp
1135 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001136 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001137 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001138
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001139 def __enter__(self):
1140 return self
1141
1142 def __exit__(self, type, value, traceback):
1143 self.close()
1144
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001145 def __repr__(self):
1146 result = ['<%s.%s' % (self.__class__.__module__,
1147 self.__class__.__qualname__)]
1148 if self.fp is not None:
1149 if self._filePassed:
1150 result.append(' file=%r' % self.fp)
1151 elif self.filename is not None:
1152 result.append(' filename=%r' % self.filename)
1153 result.append(' mode=%r' % self.mode)
1154 else:
1155 result.append(' [closed]')
1156 result.append('>')
1157 return ''.join(result)
1158
Tim Peters7d3bad62001-04-04 18:56:49 +00001159 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001160 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001161 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001162 try:
1163 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001164 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001165 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001166 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001167 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001168 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001169 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001170 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1171 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001172 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001173
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001174 # self._start_disk: Position of the start of ZIP archive
1175 # It is zero, unless ZIP was concatenated to another file
1176 self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001177 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1178 # If Zip64 extension structures are present, account for them
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001179 self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001180
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 if self.debug > 2:
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001182 inferred = self._start_disk + offset_cd
1183 print("given, inferred, offset", offset_cd, inferred, self._start_disk)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001184 # self.start_dir: Position of start of central directory
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001185 self.start_dir = offset_cd + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001187 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001188 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 total = 0
1190 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001191 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001192 if len(centdir) != sizeCentralDir:
1193 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001195 if centdir[_CD_SIGNATURE] != stringCentralDir:
1196 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001197 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001198 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001199 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001200 flags = centdir[5]
1201 if flags & 0x800:
1202 # UTF-8 file names extension
1203 filename = filename.decode('utf-8')
1204 else:
1205 # Historical ZIP filename encoding
1206 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001207 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001208 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001209 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1210 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001211 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001212 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001213 x.flag_bits, x.compress_type, t, d,
1214 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001215 if x.extract_version > MAX_EXTRACT_VERSION:
1216 raise NotImplementedError("zip file version %.1f" %
1217 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001218 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1219 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001220 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001222 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001223
1224 x._decodeExtra()
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001225 x.header_offset = x.header_offset + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001226 self.filelist.append(x)
1227 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001228
1229 # update total bytes read from central directory
1230 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1231 + centdir[_CD_EXTRA_FIELD_LENGTH]
1232 + centdir[_CD_COMMENT_LENGTH])
1233
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001234 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001235 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001236
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001237
1238 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001239 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001240 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001241
1242 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001243 """Return a list of class ZipInfo instances for files in the
1244 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001245 return self.filelist
1246
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001247 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001248 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001249 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1250 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001251 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001252 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001253 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1254 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001255
1256 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001257 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001258 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001259 for zinfo in self.filelist:
1260 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001261 # Read by chunks, to avoid an OverflowError or a
1262 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001263 with self.open(zinfo.filename, "r") as f:
1264 while f.read(chunk_size): # Check CRC-32
1265 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001266 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001267 return zinfo.filename
1268
1269 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001270 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001271 info = self.NameToInfo.get(name)
1272 if info is None:
1273 raise KeyError(
1274 'There is no item named %r in the archive' % name)
1275
1276 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001277
Thomas Wouterscf297e42007-02-23 15:07:44 +00001278 def setpassword(self, pwd):
1279 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001280 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001281 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001282 if pwd:
1283 self.pwd = pwd
1284 else:
1285 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001286
R David Murrayf50b38a2012-04-12 18:44:58 -04001287 @property
1288 def comment(self):
1289 """The comment text associated with the ZIP file."""
1290 return self._comment
1291
1292 @comment.setter
1293 def comment(self, comment):
1294 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001295 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001296 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001297 if len(comment) > ZIP_MAX_COMMENT:
1298 import warnings
1299 warnings.warn('Archive comment is too long; truncating to %d bytes'
1300 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001301 comment = comment[:ZIP_MAX_COMMENT]
1302 self._comment = comment
1303 self._didModify = True
1304
Thomas Wouterscf297e42007-02-23 15:07:44 +00001305 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001306 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001307 with self.open(name, "r", pwd) as fp:
1308 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001309
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001310 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001311 """Return file-like object for 'name'.
1312
1313 name is a string for the file name within the ZIP file, or a ZipInfo
1314 object.
1315
1316 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1317 write to a file newly added to the archive.
1318
1319 pwd is the password to decrypt files (only used for reading).
1320
1321 When writing, if the file size is not known in advance but may exceed
1322 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1323 files. If the size is known in advance, it is best to pass a ZipInfo
1324 instance for name, with zinfo.file_size set.
1325 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001326 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001327 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001328 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001329 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001330 if pwd and (mode == "w"):
1331 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001332 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001333 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001334 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001335
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001336 # Make sure we have an info object
1337 if isinstance(name, ZipInfo):
1338 # 'name' is already an info object
1339 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001340 elif mode == 'w':
1341 zinfo = ZipInfo(name)
1342 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001343 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001344 # Get info object for name
1345 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001347 if mode == 'w':
1348 return self._open_to_write(zinfo, force_zip64=force_zip64)
1349
1350 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001351 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001352 "is an open writing handle on it. "
1353 "Close the writing handle before trying to read.")
1354
1355 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001356 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001357 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1358 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001359 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001360 # Skip the file header:
1361 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001362 if len(fheader) != sizeFileHeader:
1363 raise BadZipFile("Truncated file header")
1364 fheader = struct.unpack(structFileHeader, fheader)
1365 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001366 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001367
Antoine Pitrou17babc52012-11-17 23:50:08 +01001368 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1369 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1370 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001371
Antoine Pitrou8572da52012-11-17 23:52:05 +01001372 if zinfo.flag_bits & 0x20:
1373 # Zip 2.7: compressed patched data
1374 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001375
Antoine Pitrou8572da52012-11-17 23:52:05 +01001376 if zinfo.flag_bits & 0x40:
1377 # strong encryption
1378 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001379
Antoine Pitrou17babc52012-11-17 23:50:08 +01001380 if zinfo.flag_bits & 0x800:
1381 # UTF-8 filename
1382 fname_str = fname.decode("utf-8")
1383 else:
1384 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001385
Antoine Pitrou17babc52012-11-17 23:50:08 +01001386 if fname_str != zinfo.orig_filename:
1387 raise BadZipFile(
1388 'File name in directory %r and header %r differ.'
1389 % (zinfo.orig_filename, fname))
1390
1391 # check for encrypted flag & handle password
1392 is_encrypted = zinfo.flag_bits & 0x1
1393 zd = None
1394 if is_encrypted:
1395 if not pwd:
1396 pwd = self.pwd
1397 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001398 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001399 "required for extraction" % name)
1400
1401 zd = _ZipDecrypter(pwd)
1402 # The first 12 bytes in the cypher stream is an encryption header
1403 # used to strengthen the algorithm. The first 11 bytes are
1404 # completely random, while the 12th contains the MSB of the CRC,
1405 # or the MSB of the file time depending on the header type
1406 # and is used to check the correctness of the password.
1407 header = zef_file.read(12)
1408 h = list(map(zd, header[0:12]))
1409 if zinfo.flag_bits & 0x8:
1410 # compare against the file type from extended local headers
1411 check_byte = (zinfo._raw_time >> 8) & 0xff
1412 else:
1413 # compare against the CRC otherwise
1414 check_byte = (zinfo.CRC >> 24) & 0xff
1415 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001416 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001417
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001418 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001419 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001420 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001421 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001423 def _open_to_write(self, zinfo, force_zip64=False):
1424 if force_zip64 and not self._allowZip64:
1425 raise ValueError(
1426 "force_zip64 is True, but allowZip64 was False when opening "
1427 "the ZIP file."
1428 )
1429 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001430 raise ValueError("Can't write to the ZIP file while there is "
1431 "another write handle open on it. "
1432 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001433
1434 # Sizes and CRC are overwritten with correct data after processing the file
1435 if not hasattr(zinfo, 'file_size'):
1436 zinfo.file_size = 0
1437 zinfo.compress_size = 0
1438 zinfo.CRC = 0
1439
1440 zinfo.flag_bits = 0x00
1441 if zinfo.compress_type == ZIP_LZMA:
1442 # Compressed data includes an end-of-stream (EOS) marker
1443 zinfo.flag_bits |= 0x02
1444 if not self._seekable:
1445 zinfo.flag_bits |= 0x08
1446
1447 if not zinfo.external_attr:
1448 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1449
1450 # Compressed size can be larger than uncompressed size
1451 zip64 = self._allowZip64 and \
1452 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1453
1454 if self._seekable:
1455 self.fp.seek(self.start_dir)
1456 zinfo.header_offset = self.fp.tell()
1457
1458 self._writecheck(zinfo)
1459 self._didModify = True
1460
1461 self.fp.write(zinfo.FileHeader(zip64))
1462
1463 self._writing = True
1464 return _ZipWriteFile(self, zinfo, zip64)
1465
Christian Heimes790c8232008-01-07 21:14:23 +00001466 def extract(self, member, path=None, pwd=None):
1467 """Extract a member from the archive to the current working directory,
1468 using its full name. Its file information is extracted as accurately
1469 as possible. `member' may be a filename or a ZipInfo object. You can
1470 specify a different directory using `path'.
1471 """
1472 if not isinstance(member, ZipInfo):
1473 member = self.getinfo(member)
1474
1475 if path is None:
1476 path = os.getcwd()
1477
1478 return self._extract_member(member, path, pwd)
1479
1480 def extractall(self, path=None, members=None, pwd=None):
1481 """Extract all members from the archive to the current working
1482 directory. `path' specifies a different directory to extract to.
1483 `members' is optional and must be a subset of the list returned
1484 by namelist().
1485 """
1486 if members is None:
1487 members = self.namelist()
1488
1489 for zipinfo in members:
1490 self.extract(zipinfo, path, pwd)
1491
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001492 @classmethod
1493 def _sanitize_windows_name(cls, arcname, pathsep):
1494 """Replace bad characters and remove trailing dots from parts."""
1495 table = cls._windows_illegal_name_trans_table
1496 if not table:
1497 illegal = ':<>|"?*'
1498 table = str.maketrans(illegal, '_' * len(illegal))
1499 cls._windows_illegal_name_trans_table = table
1500 arcname = arcname.translate(table)
1501 # remove trailing dots
1502 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1503 # rejoin, removing empty parts.
1504 arcname = pathsep.join(x for x in arcname if x)
1505 return arcname
1506
Christian Heimes790c8232008-01-07 21:14:23 +00001507 def _extract_member(self, member, targetpath, pwd):
1508 """Extract the ZipInfo object 'member' to a physical
1509 file on the path targetpath.
1510 """
1511 # build the destination pathname, replacing
1512 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001513 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001514
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001515 if os.path.altsep:
1516 arcname = arcname.replace(os.path.altsep, os.path.sep)
1517 # interpret absolute pathname as relative, remove drive letter or
1518 # UNC path, redundant separators, "." and ".." components.
1519 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001520 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001521 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001522 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001523 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001524 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001525 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001526
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001527 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001528 targetpath = os.path.normpath(targetpath)
1529
1530 # Create all upper directories if necessary.
1531 upperdirs = os.path.dirname(targetpath)
1532 if upperdirs and not os.path.exists(upperdirs):
1533 os.makedirs(upperdirs)
1534
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001535 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001536 if not os.path.isdir(targetpath):
1537 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001538 return targetpath
1539
Antoine Pitrou17babc52012-11-17 23:50:08 +01001540 with self.open(member, pwd=pwd) as source, \
1541 open(targetpath, "wb") as target:
1542 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001543
1544 return targetpath
1545
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001546 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001547 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001548 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001549 import warnings
1550 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001551 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001552 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001553 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001554 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001555 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001556 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001557 if not self._allowZip64:
1558 requires_zip64 = None
1559 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1560 requires_zip64 = "Files count"
1561 elif zinfo.file_size > ZIP64_LIMIT:
1562 requires_zip64 = "Filesize"
1563 elif zinfo.header_offset > ZIP64_LIMIT:
1564 requires_zip64 = "Zipfile size"
1565 if requires_zip64:
1566 raise LargeZipFile(requires_zip64 +
1567 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001568
1569 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001570 """Put the bytes from filename into the archive under the name
1571 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001572 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001573 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001574 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001575 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001576 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001577 "Can't write to ZIP archive while an open writing handle exists"
1578 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001579
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001580 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001581
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001582 if zinfo.is_dir():
1583 zinfo.compress_size = 0
1584 zinfo.CRC = 0
1585 else:
1586 if compress_type is not None:
1587 zinfo.compress_type = compress_type
1588 else:
1589 zinfo.compress_type = self.compression
1590
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001591 if zinfo.is_dir():
1592 with self._lock:
1593 if self._seekable:
1594 self.fp.seek(self.start_dir)
1595 zinfo.header_offset = self.fp.tell() # Start of header bytes
1596 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001597 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001598 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001599
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001600 self._writecheck(zinfo)
1601 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001602
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001603 self.filelist.append(zinfo)
1604 self.NameToInfo[zinfo.filename] = zinfo
1605 self.fp.write(zinfo.FileHeader(False))
1606 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001607 else:
1608 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1609 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001610
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001611 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001612 """Write a file into the archive. The contents is 'data', which
1613 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1614 it is encoded as UTF-8 first.
1615 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001616 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001617 if isinstance(data, str):
1618 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001619 if not isinstance(zinfo_or_arcname, ZipInfo):
1620 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001621 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001622 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001623 if zinfo.filename[-1] == '/':
1624 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1625 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1626 else:
1627 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001628 else:
1629 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001630
1631 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001632 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001633 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001634 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001635 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001636 "Can't write to ZIP archive while an open writing handle exists."
1637 )
1638
1639 if compress_type is not None:
1640 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001641
Guido van Rossum85825dc2007-08-27 17:03:28 +00001642 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001643 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001644 with self.open(zinfo, mode='w') as dest:
1645 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001646
1647 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001648 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001649 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001650
1651 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001652 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001653 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001654 if self.fp is None:
1655 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001656
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001657 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001658 raise ValueError("Can't close the ZIP file while there is "
1659 "an open writing handle on it. "
1660 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001661
Antoine Pitrou17babc52012-11-17 23:50:08 +01001662 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001663 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001664 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001665 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001666 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001667 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001668 finally:
1669 fp = self.fp
1670 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001671 self._fpclose(fp)
1672
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001673 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001674 for zinfo in self.filelist: # write central directory
1675 dt = zinfo.date_time
1676 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1677 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1678 extra = []
1679 if zinfo.file_size > ZIP64_LIMIT \
1680 or zinfo.compress_size > ZIP64_LIMIT:
1681 extra.append(zinfo.file_size)
1682 extra.append(zinfo.compress_size)
1683 file_size = 0xffffffff
1684 compress_size = 0xffffffff
1685 else:
1686 file_size = zinfo.file_size
1687 compress_size = zinfo.compress_size
1688
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001689 header_offset = zinfo.header_offset - self._start_disk
1690 if header_offset > ZIP64_LIMIT:
1691 extra.append(header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001692 header_offset = 0xffffffff
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001693
1694 extra_data = zinfo.extra
1695 min_version = 0
1696 if extra:
1697 # Append a ZIP64 field to the extra's
1698 extra_data = struct.pack(
1699 '<HH' + 'Q'*len(extra),
1700 1, 8*len(extra), *extra) + extra_data
1701
1702 min_version = ZIP64_VERSION
1703
1704 if zinfo.compress_type == ZIP_BZIP2:
1705 min_version = max(BZIP2_VERSION, min_version)
1706 elif zinfo.compress_type == ZIP_LZMA:
1707 min_version = max(LZMA_VERSION, min_version)
1708
1709 extract_version = max(min_version, zinfo.extract_version)
1710 create_version = max(min_version, zinfo.create_version)
1711 try:
1712 filename, flag_bits = zinfo._encodeFilenameFlags()
1713 centdir = struct.pack(structCentralDir,
1714 stringCentralDir, create_version,
1715 zinfo.create_system, extract_version, zinfo.reserved,
1716 flag_bits, zinfo.compress_type, dostime, dosdate,
1717 zinfo.CRC, compress_size, file_size,
1718 len(filename), len(extra_data), len(zinfo.comment),
1719 0, zinfo.internal_attr, zinfo.external_attr,
1720 header_offset)
1721 except DeprecationWarning:
1722 print((structCentralDir, stringCentralDir, create_version,
1723 zinfo.create_system, extract_version, zinfo.reserved,
1724 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1725 zinfo.CRC, compress_size, file_size,
1726 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1727 0, zinfo.internal_attr, zinfo.external_attr,
1728 header_offset), file=sys.stderr)
1729 raise
1730 self.fp.write(centdir)
1731 self.fp.write(filename)
1732 self.fp.write(extra_data)
1733 self.fp.write(zinfo.comment)
1734
1735 pos2 = self.fp.tell()
1736 # Write end-of-zip-archive record
1737 centDirCount = len(self.filelist)
1738 centDirSize = pos2 - self.start_dir
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001739 centDirOffset = self.start_dir - self._start_disk
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001740 requires_zip64 = None
1741 if centDirCount > ZIP_FILECOUNT_LIMIT:
1742 requires_zip64 = "Files count"
1743 elif centDirOffset > ZIP64_LIMIT:
1744 requires_zip64 = "Central directory offset"
1745 elif centDirSize > ZIP64_LIMIT:
1746 requires_zip64 = "Central directory size"
1747 if requires_zip64:
1748 # Need to write the ZIP64 end-of-archive records
1749 if not self._allowZip64:
1750 raise LargeZipFile(requires_zip64 +
1751 " would require ZIP64 extensions")
1752 zip64endrec = struct.pack(
1753 structEndArchive64, stringEndArchive64,
1754 44, 45, 45, 0, 0, centDirCount, centDirCount,
1755 centDirSize, centDirOffset)
1756 self.fp.write(zip64endrec)
1757
1758 zip64locrec = struct.pack(
1759 structEndArchive64Locator,
1760 stringEndArchive64Locator, 0, pos2, 1)
1761 self.fp.write(zip64locrec)
1762 centDirCount = min(centDirCount, 0xFFFF)
1763 centDirSize = min(centDirSize, 0xFFFFFFFF)
1764 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1765
1766 endrec = struct.pack(structEndArchive, stringEndArchive,
1767 0, 0, centDirCount, centDirCount,
1768 centDirSize, centDirOffset, len(self._comment))
1769 self.fp.write(endrec)
1770 self.fp.write(self._comment)
1771 self.fp.flush()
1772
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001773 def _fpclose(self, fp):
1774 assert self._fileRefCnt > 0
1775 self._fileRefCnt -= 1
1776 if not self._fileRefCnt and not self._filePassed:
1777 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001778
1779
1780class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001781 """Class to create ZIP archives with Python library files and packages."""
1782
Georg Brandl8334fd92010-12-04 10:26:46 +00001783 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001784 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001785 ZipFile.__init__(self, file, mode=mode, compression=compression,
1786 allowZip64=allowZip64)
1787 self._optimize = optimize
1788
Christian Tismer59202e52013-10-21 03:59:23 +02001789 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001790 """Add all files from "pathname" to the ZIP archive.
1791
Fred Drake484d7352000-10-02 21:14:52 +00001792 If pathname is a package directory, search the directory and
1793 all package subdirectories recursively for all *.py and enter
1794 the modules into the archive. If pathname is a plain
1795 directory, listdir *.py and enter all modules. Else, pathname
1796 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001797 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001798 This method will compile the module.py into module.pyc if
1799 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001800 If filterfunc(pathname) is given, it is called with every argument.
1801 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001802 """
Christian Tismer59202e52013-10-21 03:59:23 +02001803 if filterfunc and not filterfunc(pathname):
1804 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001805 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001806 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001807 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001808 dir, name = os.path.split(pathname)
1809 if os.path.isdir(pathname):
1810 initname = os.path.join(pathname, "__init__.py")
1811 if os.path.isfile(initname):
1812 # This is a package directory, add it
1813 if basename:
1814 basename = "%s/%s" % (basename, name)
1815 else:
1816 basename = name
1817 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001818 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001819 fname, arcname = self._get_codename(initname[0:-3], basename)
1820 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001821 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001822 self.write(fname, arcname)
1823 dirlist = os.listdir(pathname)
1824 dirlist.remove("__init__.py")
1825 # Add all *.py files and package subdirectories
1826 for filename in dirlist:
1827 path = os.path.join(pathname, filename)
1828 root, ext = os.path.splitext(filename)
1829 if os.path.isdir(path):
1830 if os.path.isfile(os.path.join(path, "__init__.py")):
1831 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001832 self.writepy(path, basename,
1833 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001834 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001835 if filterfunc and not filterfunc(path):
1836 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001837 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001838 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001839 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001840 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001841 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001842 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001843 self.write(fname, arcname)
1844 else:
1845 # This is NOT a package directory, add its files at top level
1846 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001847 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001848 for filename in os.listdir(pathname):
1849 path = os.path.join(pathname, filename)
1850 root, ext = os.path.splitext(filename)
1851 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001852 if filterfunc and not filterfunc(path):
1853 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001854 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001855 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001856 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001857 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001858 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001859 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001860 self.write(fname, arcname)
1861 else:
1862 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001863 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001864 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001865 fname, arcname = self._get_codename(pathname[0:-3], basename)
1866 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001867 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001868 self.write(fname, arcname)
1869
1870 def _get_codename(self, pathname, basename):
1871 """Return (filename, archivename) for the path.
1872
Fred Drake484d7352000-10-02 21:14:52 +00001873 Given a module name path, return the correct file path and
1874 archive name, compiling if necessary. For example, given
1875 /python/lib/string, return (/python/lib/string.pyc, string).
1876 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001877 def _compile(file, optimize=-1):
1878 import py_compile
1879 if self.debug:
1880 print("Compiling", file)
1881 try:
1882 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001883 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001884 print(err.msg)
1885 return False
1886 return True
1887
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001888 file_py = pathname + ".py"
1889 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001890 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1891 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1892 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001893 if self._optimize == -1:
1894 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001895 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001896 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1897 # Use .pyc file.
1898 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001899 elif (os.path.isfile(pycache_opt0) and
1900 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001901 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1902 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001903 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001904 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001905 elif (os.path.isfile(pycache_opt1) and
1906 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1907 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001908 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001909 fname = pycache_opt1
1910 arcname = file_pyc
1911 elif (os.path.isfile(pycache_opt2) and
1912 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1913 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1914 # file name in the archive.
1915 fname = pycache_opt2
1916 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001917 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001918 # Compile py into PEP 3147 pyc file.
1919 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001920 if sys.flags.optimize == 0:
1921 fname = pycache_opt0
1922 elif sys.flags.optimize == 1:
1923 fname = pycache_opt1
1924 else:
1925 fname = pycache_opt2
1926 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001927 else:
1928 fname = arcname = file_py
1929 else:
1930 # new mode: use given optimization level
1931 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001932 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001933 arcname = file_pyc
1934 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001935 arcname = file_pyc
1936 if self._optimize == 1:
1937 fname = pycache_opt1
1938 elif self._optimize == 2:
1939 fname = pycache_opt2
1940 else:
1941 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1942 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001943 if not (os.path.isfile(fname) and
1944 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1945 if not _compile(file_py, optimize=self._optimize):
1946 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001947 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001948 if basename:
1949 archivename = "%s/%s" % (basename, archivename)
1950 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001951
1952
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001953def main(args=None):
1954 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001955
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001956 description = 'A simple command line interface for zipfile module.'
1957 parser = argparse.ArgumentParser(description=description)
1958 group = parser.add_mutually_exclusive_group()
1959 group.add_argument('-l', '--list', metavar='<zipfile>',
1960 help='Show listing of a zipfile')
1961 group.add_argument('-e', '--extract', nargs=2,
1962 metavar=('<zipfile>', '<output_dir>'),
1963 help='Extract zipfile into target dir')
1964 group.add_argument('-c', '--create', nargs='+',
1965 metavar=('<name>', '<file>'),
1966 help='Create zipfile from sources')
1967 group.add_argument('-t', '--test', metavar='<zipfile>',
1968 help='Test if a zipfile is valid')
1969 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001970
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001971 if args.test is not None:
1972 src = args.test
1973 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001974 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001975 if badfile:
1976 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001977 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001978
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001979 elif args.list is not None:
1980 src = args.list
1981 with ZipFile(src, 'r') as zf:
1982 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001983
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001984 elif args.extract is not None:
1985 src, curdir = args.extract
1986 with ZipFile(src, 'r') as zf:
1987 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001988
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001989 elif args.create is not None:
1990 zip_name = args.create.pop(0)
1991 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992
1993 def addToZip(zf, path, zippath):
1994 if os.path.isfile(path):
1995 zf.write(path, zippath, ZIP_DEFLATED)
1996 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001997 if zippath:
1998 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001999 for nm in os.listdir(path):
2000 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002001 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002002 # else: ignore
2003
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002004 with ZipFile(zip_name, 'w') as zf:
2005 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002006 zippath = os.path.basename(path)
2007 if not zippath:
2008 zippath = os.path.basename(os.path.dirname(path))
2009 if zippath in ('', os.curdir, os.pardir):
2010 zippath = ''
2011 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002012
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002013 else:
2014 parser.exit(2, parser.format_usage())
2015
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002016if __name__ == "__main__":
2017 main()