blob: e1d07f2a5237bb7f37c45126aaa990ac6465262e [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
shireenraoa4e29912019-08-24 11:26:41 -040010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200379 self.compress_size = 0 # Size of the compressed file
380 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000382 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000383 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200468 data = extra[4:ln+4]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 # ZIP64 extension (large files and/or large archives)
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200470 try:
471 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
472 field = "File size"
473 self.file_size, = unpack('<Q', data[:8])
474 data = data[8:]
475 if self.compress_size == 0xFFFF_FFFF:
476 field = "Compress size"
477 self.compress_size, = unpack('<Q', data[:8])
478 data = data[8:]
479 if self.header_offset == 0xFFFF_FFFF:
480 field = "Header offset"
481 self.header_offset, = unpack('<Q', data[:8])
482 except struct.error:
483 raise BadZipFile(f"Corrupt zip64 extra field. "
484 f"{field} not found.") from None
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000485
486 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000487
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200488 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200489 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200490 """Construct an appropriate ZipInfo for a file on the filesystem.
491
492 filename should be the path to a file or directory on the filesystem.
493
494 arcname is the name which it will have within the archive (by default,
495 this will be the same as filename, but without a drive letter and with
496 leading path separators removed).
497 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200498 if isinstance(filename, os.PathLike):
499 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200500 st = os.stat(filename)
501 isdir = stat.S_ISDIR(st.st_mode)
502 mtime = time.localtime(st.st_mtime)
503 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200504 if not strict_timestamps and date_time[0] < 1980:
505 date_time = (1980, 1, 1, 0, 0, 0)
506 elif not strict_timestamps and date_time[0] > 2107:
507 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200508 # Create ZipInfo instance to store file information
509 if arcname is None:
510 arcname = filename
511 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
512 while arcname[0] in (os.sep, os.altsep):
513 arcname = arcname[1:]
514 if isdir:
515 arcname += '/'
516 zinfo = cls(arcname, date_time)
517 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
518 if isdir:
519 zinfo.file_size = 0
520 zinfo.external_attr |= 0x10 # MS-DOS directory flag
521 else:
522 zinfo.file_size = st.st_size
523
524 return zinfo
525
526 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300527 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200528 return self.filename[-1] == '/'
529
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000530
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300531# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
532# internal keys. We noticed that a direct implementation is faster than
533# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000534
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300535_crctable = None
536def _gen_crc(crc):
537 for j in range(8):
538 if crc & 1:
539 crc = (crc >> 1) ^ 0xEDB88320
540 else:
541 crc >>= 1
542 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300544# ZIP supports a password-based form of encryption. Even though known
545# plaintext attacks have been found against it, it is still useful
546# to be able to get data out of such a file.
547#
548# Usage:
549# zd = _ZipDecrypter(mypwd)
550# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552def _ZipDecrypter(pwd):
553 key0 = 305419896
554 key1 = 591751049
555 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000556
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300557 global _crctable
558 if _crctable is None:
559 _crctable = list(map(_gen_crc, range(256)))
560 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000561
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300562 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300564 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 def update_keys(c):
567 nonlocal key0, key1, key2
568 key0 = crc32(c, key0)
569 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
570 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
571 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573 for p in pwd:
574 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000575
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300576 def decrypter(data):
577 """Decrypt a bytes object."""
578 result = bytearray()
579 append = result.append
580 for c in data:
581 k = key2 | 2
582 c ^= ((k * (k^1)) >> 8) & 0xFF
583 update_keys(c)
584 append(c)
585 return bytes(result)
586
587 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000588
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200589
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200590class LZMACompressor:
591
592 def __init__(self):
593 self._comp = None
594
595 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200596 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200597 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200598 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599 ])
600 return struct.pack('<BBH', 9, 4, len(props)) + props
601
602 def compress(self, data):
603 if self._comp is None:
604 return self._init() + self._comp.compress(data)
605 return self._comp.compress(data)
606
607 def flush(self):
608 if self._comp is None:
609 return self._init() + self._comp.flush()
610 return self._comp.flush()
611
612
613class LZMADecompressor:
614
615 def __init__(self):
616 self._decomp = None
617 self._unconsumed = b''
618 self.eof = False
619
620 def decompress(self, data):
621 if self._decomp is None:
622 self._unconsumed += data
623 if len(self._unconsumed) <= 4:
624 return b''
625 psize, = struct.unpack('<H', self._unconsumed[2:4])
626 if len(self._unconsumed) <= 4 + psize:
627 return b''
628
629 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200630 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
631 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200632 ])
633 data = self._unconsumed[4 + psize:]
634 del self._unconsumed
635
636 result = self._decomp.decompress(data)
637 self.eof = self._decomp.eof
638 return result
639
640
641compressor_names = {
642 0: 'store',
643 1: 'shrink',
644 2: 'reduce',
645 3: 'reduce',
646 4: 'reduce',
647 5: 'reduce',
648 6: 'implode',
649 7: 'tokenize',
650 8: 'deflate',
651 9: 'deflate64',
652 10: 'implode',
653 12: 'bzip2',
654 14: 'lzma',
655 18: 'terse',
656 19: 'lz77',
657 97: 'wavpack',
658 98: 'ppmd',
659}
660
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200661def _check_compression(compression):
662 if compression == ZIP_STORED:
663 pass
664 elif compression == ZIP_DEFLATED:
665 if not zlib:
666 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200667 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 elif compression == ZIP_BZIP2:
669 if not bz2:
670 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200671 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200672 elif compression == ZIP_LZMA:
673 if not lzma:
674 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200675 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200676 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300677 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678
679
Bo Baylesce237c72018-01-29 23:54:07 -0600680def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200681 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600682 if compresslevel is not None:
683 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
684 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600686 if compresslevel is not None:
687 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200688 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600689 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200690 elif compress_type == ZIP_LZMA:
691 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200692 else:
693 return None
694
695
696def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300697 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200698 if compress_type == ZIP_STORED:
699 return None
700 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200701 return zlib.decompressobj(-15)
702 elif compress_type == ZIP_BZIP2:
703 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200704 elif compress_type == ZIP_LZMA:
705 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200706 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200707 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200708 if descr:
709 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
710 else:
711 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200712
713
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200714class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300715 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200716 self._file = file
717 self._pos = pos
718 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200719 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300720 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700721 self.seekable = file.seekable
722 self.tell = file.tell
723
724 def seek(self, offset, whence=0):
725 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200726 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700727 raise ValueError("Can't reposition in the ZIP file while "
728 "there is an open writing handle on it. "
729 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200730 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700731 self._pos = self._file.tell()
732 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200733
734 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200735 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300736 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300737 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300738 "is an open writing handle on it. "
739 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200740 self._file.seek(self._pos)
741 data = self._file.read(n)
742 self._pos = self._file.tell()
743 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200744
745 def close(self):
746 if self._file is not None:
747 fileobj = self._file
748 self._file = None
749 self._close(fileobj)
750
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200751# Provide the tell method for unseekable stream
752class _Tellable:
753 def __init__(self, fp):
754 self.fp = fp
755 self.offset = 0
756
757 def write(self, data):
758 n = self.fp.write(data)
759 self.offset += n
760 return n
761
762 def tell(self):
763 return self.offset
764
765 def flush(self):
766 self.fp.flush()
767
768 def close(self):
769 self.fp.close()
770
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200771
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000772class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 """File-like object for reading an archive member.
774 Is returned by ZipFile.open().
775 """
776
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 # Max size supported by decompressor.
778 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 # Read from compressed files in 4k blocks.
781 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782
John Jolly066df4f2018-01-30 01:51:35 -0700783 # Chunk size to read during seek
784 MAX_SEEK_READ = 1 << 24
785
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200786 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000787 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000788 self._fileobj = fileobj
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200789 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000790 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000791
Ezio Melotti92b47432010-01-28 01:44:41 +0000792 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000793 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200794 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000795
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200796 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200798 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000799 self._readbuffer = b''
800 self._offset = 0
801
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 self.newlines = None
803
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805 self.name = zipinfo.filename
806
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000807 if hasattr(zipinfo, 'CRC'):
808 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000809 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000810 else:
811 self._expected_crc = None
812
John Jolly066df4f2018-01-30 01:51:35 -0700813 self._seekable = False
814 try:
815 if fileobj.seekable():
816 self._orig_compress_start = fileobj.tell()
817 self._orig_compress_size = zipinfo.compress_size
818 self._orig_file_size = zipinfo.file_size
819 self._orig_start_crc = self._running_crc
820 self._seekable = True
821 except AttributeError:
822 pass
823
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200824 self._decrypter = None
825 if pwd:
826 if zipinfo.flag_bits & 0x8:
827 # compare against the file type from extended local headers
828 check_byte = (zipinfo._raw_time >> 8) & 0xff
829 else:
830 # compare against the CRC otherwise
831 check_byte = (zipinfo.CRC >> 24) & 0xff
832 h = self._init_decrypter()
833 if h != check_byte:
834 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
835
836
837 def _init_decrypter(self):
838 self._decrypter = _ZipDecrypter(self._pwd)
839 # The first 12 bytes in the cypher stream is an encryption header
840 # used to strengthen the algorithm. The first 11 bytes are
841 # completely random, while the 12th contains the MSB of the CRC,
842 # or the MSB of the file time depending on the header type
843 # and is used to check the correctness of the password.
844 header = self._fileobj.read(12)
845 self._compress_left -= 12
846 return self._decrypter(header)[11]
847
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200848 def __repr__(self):
849 result = ['<%s.%s' % (self.__class__.__module__,
850 self.__class__.__qualname__)]
851 if not self.closed:
852 result.append(' name=%r mode=%r' % (self.name, self.mode))
853 if self._compress_type != ZIP_STORED:
854 result.append(' compress_type=%s' %
855 compressor_names.get(self._compress_type,
856 self._compress_type))
857 else:
858 result.append(' [closed]')
859 result.append('>')
860 return ''.join(result)
861
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000862 def readline(self, limit=-1):
863 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000865 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
Serhiy Storchakae670be22016-06-11 19:32:44 +0300868 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000869 # Shortcut common case - newline found in buffer.
870 i = self._readbuffer.find(b'\n', self._offset) + 1
871 if i > 0:
872 line = self._readbuffer[self._offset: i]
873 self._offset = i
874 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875
Serhiy Storchakae670be22016-06-11 19:32:44 +0300876 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000877
878 def peek(self, n=1):
879 """Returns buffered bytes without advancing the position."""
880 if n > len(self._readbuffer) - self._offset:
881 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 if len(chunk) > self._offset:
883 self._readbuffer = chunk + self._readbuffer[self._offset:]
884 self._offset = 0
885 else:
886 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000887
888 # Return up to 512 bytes to reduce allocation overhead for tight loops.
889 return self._readbuffer[self._offset: self._offset + 512]
890
891 def readable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +1100892 if self.closed:
893 raise ValueError("I/O operation on closed file.")
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000894 return True
895
896 def read(self, n=-1):
897 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800898 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 """
Daniel Hillier8d62df62019-11-30 19:30:47 +1100900 if self.closed:
901 raise ValueError("read from closed file.")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200902 if n is None or n < 0:
903 buf = self._readbuffer[self._offset:]
904 self._readbuffer = b''
905 self._offset = 0
906 while not self._eof:
907 buf += self._read1(self.MAX_N)
908 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909
Antoine Pitrou78157b32012-06-23 16:44:48 +0200910 end = n + self._offset
911 if end < len(self._readbuffer):
912 buf = self._readbuffer[self._offset:end]
913 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 return buf
915
Antoine Pitrou78157b32012-06-23 16:44:48 +0200916 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200917 buf = self._readbuffer[self._offset:]
918 self._readbuffer = b''
919 self._offset = 0
920 while n > 0 and not self._eof:
921 data = self._read1(n)
922 if n < len(data):
923 self._readbuffer = data
924 self._offset = n
925 buf += data[:n]
926 break
927 buf += data
928 n -= len(data)
929 return buf
930
931 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000932 # Update the CRC using the given data.
933 if self._expected_crc is None:
934 # No need to compute the CRC if we don't have a reference value
935 return
Martin Panterb82032f2015-12-11 05:19:29 +0000936 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000937 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200938 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000939 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000940
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000941 def read1(self, n):
942 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200944 if n is None or n < 0:
945 buf = self._readbuffer[self._offset:]
946 self._readbuffer = b''
947 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300948 while not self._eof:
949 data = self._read1(self.MAX_N)
950 if data:
951 buf += data
952 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200953 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954
Antoine Pitrou78157b32012-06-23 16:44:48 +0200955 end = n + self._offset
956 if end < len(self._readbuffer):
957 buf = self._readbuffer[self._offset:end]
958 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200959 return buf
960
Antoine Pitrou78157b32012-06-23 16:44:48 +0200961 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 buf = self._readbuffer[self._offset:]
963 self._readbuffer = b''
964 self._offset = 0
965 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300966 while not self._eof:
967 data = self._read1(n)
968 if n < len(data):
969 self._readbuffer = data
970 self._offset = n
971 buf += data[:n]
972 break
973 if data:
974 buf += data
975 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200976 return buf
977
978 def _read1(self, n):
979 # Read up to n compressed bytes with at most one read() system call,
980 # decrypt and decompress them.
981 if self._eof or n <= 0:
982 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000983
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000984 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200985 if self._compress_type == ZIP_DEFLATED:
986 ## Handle unconsumed data.
987 data = self._decompressor.unconsumed_tail
988 if n > len(data):
989 data += self._read2(n - len(data))
990 else:
991 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200993 if self._compress_type == ZIP_STORED:
994 self._eof = self._compress_left <= 0
995 elif self._compress_type == ZIP_DEFLATED:
996 n = max(n, self.MIN_READ_SIZE)
997 data = self._decompressor.decompress(data, n)
998 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200999 self._compress_left <= 0 and
1000 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001001 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001002 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001003 else:
1004 data = self._decompressor.decompress(data)
1005 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001006
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001007 data = data[:self._left]
1008 self._left -= len(data)
1009 if self._left <= 0:
1010 self._eof = True
1011 self._update_crc(data)
1012 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001013
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001014 def _read2(self, n):
1015 if self._compress_left <= 0:
1016 return b''
1017
1018 n = max(n, self.MIN_READ_SIZE)
1019 n = min(n, self._compress_left)
1020
1021 data = self._fileobj.read(n)
1022 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001023 if not data:
1024 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001025
1026 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001027 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001028 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001029
Łukasz Langae94980a2010-11-22 23:31:26 +00001030 def close(self):
1031 try:
1032 if self._close_fileobj:
1033 self._fileobj.close()
1034 finally:
1035 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001036
John Jolly066df4f2018-01-30 01:51:35 -07001037 def seekable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001038 if self.closed:
1039 raise ValueError("I/O operation on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001040 return self._seekable
1041
1042 def seek(self, offset, whence=0):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001043 if self.closed:
1044 raise ValueError("seek on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001045 if not self._seekable:
1046 raise io.UnsupportedOperation("underlying stream is not seekable")
1047 curr_pos = self.tell()
1048 if whence == 0: # Seek from start of file
1049 new_pos = offset
1050 elif whence == 1: # Seek from current position
1051 new_pos = curr_pos + offset
1052 elif whence == 2: # Seek from EOF
1053 new_pos = self._orig_file_size + offset
1054 else:
1055 raise ValueError("whence must be os.SEEK_SET (0), "
1056 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1057
1058 if new_pos > self._orig_file_size:
1059 new_pos = self._orig_file_size
1060
1061 if new_pos < 0:
1062 new_pos = 0
1063
1064 read_offset = new_pos - curr_pos
1065 buff_offset = read_offset + self._offset
1066
1067 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1068 # Just move the _offset index if the new position is in the _readbuffer
1069 self._offset = buff_offset
1070 read_offset = 0
1071 elif read_offset < 0:
1072 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001073 self._fileobj.seek(self._orig_compress_start)
1074 self._running_crc = self._orig_start_crc
1075 self._compress_left = self._orig_compress_size
1076 self._left = self._orig_file_size
1077 self._readbuffer = b''
1078 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001079 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001080 self._eof = False
1081 read_offset = new_pos
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001082 if self._decrypter is not None:
1083 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001084
1085 while read_offset > 0:
1086 read_len = min(self.MAX_SEEK_READ, read_offset)
1087 self.read(read_len)
1088 read_offset -= read_len
1089
1090 return self.tell()
1091
1092 def tell(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001093 if self.closed:
1094 raise ValueError("tell on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001095 if not self._seekable:
1096 raise io.UnsupportedOperation("underlying stream is not seekable")
1097 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1098 return filepos
1099
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001100
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001101class _ZipWriteFile(io.BufferedIOBase):
1102 def __init__(self, zf, zinfo, zip64):
1103 self._zinfo = zinfo
1104 self._zip64 = zip64
1105 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001106 self._compressor = _get_compressor(zinfo.compress_type,
1107 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001108 self._file_size = 0
1109 self._compress_size = 0
1110 self._crc = 0
1111
1112 @property
1113 def _fileobj(self):
1114 return self._zipfile.fp
1115
1116 def writable(self):
1117 return True
1118
1119 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001120 if self.closed:
1121 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001122 nbytes = len(data)
1123 self._file_size += nbytes
1124 self._crc = crc32(data, self._crc)
1125 if self._compressor:
1126 data = self._compressor.compress(data)
1127 self._compress_size += len(data)
1128 self._fileobj.write(data)
1129 return nbytes
1130
1131 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001132 if self.closed:
1133 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001134 try:
1135 super().close()
1136 # Flush any data from the compressor, and update header info
1137 if self._compressor:
1138 buf = self._compressor.flush()
1139 self._compress_size += len(buf)
1140 self._fileobj.write(buf)
1141 self._zinfo.compress_size = self._compress_size
1142 else:
1143 self._zinfo.compress_size = self._file_size
1144 self._zinfo.CRC = self._crc
1145 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001146
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001147 # Write updated header info
1148 if self._zinfo.flag_bits & 0x08:
1149 # Write CRC and file sizes after the file data
1150 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1151 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1152 self._zinfo.compress_size, self._zinfo.file_size))
1153 self._zipfile.start_dir = self._fileobj.tell()
1154 else:
1155 if not self._zip64:
1156 if self._file_size > ZIP64_LIMIT:
1157 raise RuntimeError(
1158 'File size unexpectedly exceeded ZIP64 limit')
1159 if self._compress_size > ZIP64_LIMIT:
1160 raise RuntimeError(
1161 'Compressed size unexpectedly exceeded ZIP64 limit')
1162 # Seek backwards and write file header (which will now include
1163 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001164
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001165 # Preserve current position in file
1166 self._zipfile.start_dir = self._fileobj.tell()
1167 self._fileobj.seek(self._zinfo.header_offset)
1168 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1169 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001170
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001171 # Successfully written: Add file to our caches
1172 self._zipfile.filelist.append(self._zinfo)
1173 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1174 finally:
1175 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001176
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001177
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001178
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001180 """ Class with methods to open, read, write, close, list zip files.
1181
Bo Baylesce237c72018-01-29 23:54:07 -06001182 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1183 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001184
Fred Drake3d9091e2001-03-26 15:49:24 +00001185 file: Either the path to the file, or a file-like object.
1186 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001187 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1188 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001189 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1190 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001191 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1192 needed, otherwise it will raise an exception when this would
1193 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001194 compresslevel: None (default for the given compression type) or an integer
1195 specifying the level to pass to the compressor.
1196 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1197 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1198 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001199
Fred Drake3d9091e2001-03-26 15:49:24 +00001200 """
Fred Drake484d7352000-10-02 21:14:52 +00001201
Fred Drake90eac282001-02-28 05:29:34 +00001202 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001203 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001204
Bo Baylesce237c72018-01-29 23:54:07 -06001205 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001206 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001207 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1208 or append 'a'."""
1209 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001210 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001211
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001212 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001213
1214 self._allowZip64 = allowZip64
1215 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001216 self.debug = 0 # Level of printing: 0 through 3
1217 self.NameToInfo = {} # Find file info given name
1218 self.filelist = [] # List of ZipInfo instances for archive
1219 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001220 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001221 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001222 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001223 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001224 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001225
Fred Drake3d9091e2001-03-26 15:49:24 +00001226 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001227 if isinstance(file, os.PathLike):
1228 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001229 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001230 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001231 self._filePassed = 0
1232 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001233 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1234 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001235 filemode = modeDict[mode]
1236 while True:
1237 try:
1238 self.fp = io.open(file, filemode)
1239 except OSError:
1240 if filemode in modeDict:
1241 filemode = modeDict[filemode]
1242 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001243 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001244 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001245 else:
1246 self._filePassed = 1
1247 self.fp = file
1248 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001249 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001250 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001251 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001252 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001253
Antoine Pitrou17babc52012-11-17 23:50:08 +01001254 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001255 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001256 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001257 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001258 # set the modified flag so central directory gets written
1259 # even if no files are added to the archive
1260 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001261 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001262 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001263 except (AttributeError, OSError):
1264 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001265 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001266 self._seekable = False
1267 else:
1268 # Some file-like objects can provide tell() but not seek()
1269 try:
1270 self.fp.seek(self.start_dir)
1271 except (AttributeError, OSError):
1272 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001273 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001274 try:
1275 # See if file is a zip file
1276 self._RealGetContents()
1277 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001278 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001279 except BadZipFile:
1280 # file is not a zip file, just append
1281 self.fp.seek(0, 2)
1282
1283 # set the modified flag so central directory gets written
1284 # even if no files are added to the archive
1285 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001286 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001287 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001288 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001289 except:
1290 fp = self.fp
1291 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001292 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001293 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001294
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001295 def __enter__(self):
1296 return self
1297
1298 def __exit__(self, type, value, traceback):
1299 self.close()
1300
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001301 def __repr__(self):
1302 result = ['<%s.%s' % (self.__class__.__module__,
1303 self.__class__.__qualname__)]
1304 if self.fp is not None:
1305 if self._filePassed:
1306 result.append(' file=%r' % self.fp)
1307 elif self.filename is not None:
1308 result.append(' filename=%r' % self.filename)
1309 result.append(' mode=%r' % self.mode)
1310 else:
1311 result.append(' [closed]')
1312 result.append('>')
1313 return ''.join(result)
1314
Tim Peters7d3bad62001-04-04 18:56:49 +00001315 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001316 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001318 try:
1319 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001320 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001321 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001322 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001323 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001325 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001326 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1327 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001328 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001329
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001330 # "concat" is zero, unless zip was concatenated to another file
1331 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001332 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1333 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001334 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001335
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001337 inferred = concat + offset_cd
1338 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001340 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001341 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001342 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001343 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 total = 0
1345 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001346 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001347 if len(centdir) != sizeCentralDir:
1348 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001350 if centdir[_CD_SIGNATURE] != stringCentralDir:
1351 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001353 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001354 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001355 flags = centdir[5]
1356 if flags & 0x800:
1357 # UTF-8 file names extension
1358 filename = filename.decode('utf-8')
1359 else:
1360 # Historical ZIP filename encoding
1361 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001363 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001364 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1365 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001366 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001367 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001368 x.flag_bits, x.compress_type, t, d,
1369 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001370 if x.extract_version > MAX_EXTRACT_VERSION:
1371 raise NotImplementedError("zip file version %.1f" %
1372 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1374 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001375 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001377 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001378
1379 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001380 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001381 self.filelist.append(x)
1382 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001383
1384 # update total bytes read from central directory
1385 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1386 + centdir[_CD_EXTRA_FIELD_LENGTH]
1387 + centdir[_CD_COMMENT_LENGTH])
1388
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001389 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001390 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001391
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392
1393 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001394 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001395 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396
1397 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001398 """Return a list of class ZipInfo instances for files in the
1399 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 return self.filelist
1401
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001402 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001403 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001404 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1405 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001406 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001407 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001408 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1409 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001410
1411 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001412 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001413 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001414 for zinfo in self.filelist:
1415 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001416 # Read by chunks, to avoid an OverflowError or a
1417 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001418 with self.open(zinfo.filename, "r") as f:
1419 while f.read(chunk_size): # Check CRC-32
1420 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001421 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422 return zinfo.filename
1423
1424 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001425 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001426 info = self.NameToInfo.get(name)
1427 if info is None:
1428 raise KeyError(
1429 'There is no item named %r in the archive' % name)
1430
1431 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001432
Thomas Wouterscf297e42007-02-23 15:07:44 +00001433 def setpassword(self, pwd):
1434 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001435 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001436 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001437 if pwd:
1438 self.pwd = pwd
1439 else:
1440 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001441
R David Murrayf50b38a2012-04-12 18:44:58 -04001442 @property
1443 def comment(self):
1444 """The comment text associated with the ZIP file."""
1445 return self._comment
1446
1447 @comment.setter
1448 def comment(self, comment):
1449 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001450 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001451 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001452 if len(comment) > ZIP_MAX_COMMENT:
1453 import warnings
1454 warnings.warn('Archive comment is too long; truncating to %d bytes'
1455 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001456 comment = comment[:ZIP_MAX_COMMENT]
1457 self._comment = comment
1458 self._didModify = True
1459
Thomas Wouterscf297e42007-02-23 15:07:44 +00001460 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001461 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001462 with self.open(name, "r", pwd) as fp:
1463 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001464
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001465 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001466 """Return file-like object for 'name'.
1467
1468 name is a string for the file name within the ZIP file, or a ZipInfo
1469 object.
1470
1471 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1472 write to a file newly added to the archive.
1473
1474 pwd is the password to decrypt files (only used for reading).
1475
1476 When writing, if the file size is not known in advance but may exceed
1477 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1478 files. If the size is known in advance, it is best to pass a ZipInfo
1479 instance for name, with zinfo.file_size set.
1480 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001481 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001482 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001483 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001484 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001485 if pwd and (mode == "w"):
1486 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001487 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001488 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001489 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001490
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001491 # Make sure we have an info object
1492 if isinstance(name, ZipInfo):
1493 # 'name' is already an info object
1494 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001495 elif mode == 'w':
1496 zinfo = ZipInfo(name)
1497 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001498 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001499 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001500 # Get info object for name
1501 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001502
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001503 if mode == 'w':
1504 return self._open_to_write(zinfo, force_zip64=force_zip64)
1505
1506 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001507 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001508 "is an open writing handle on it. "
1509 "Close the writing handle before trying to read.")
1510
1511 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001512 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001513 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1514 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001515 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001516 # Skip the file header:
1517 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001518 if len(fheader) != sizeFileHeader:
1519 raise BadZipFile("Truncated file header")
1520 fheader = struct.unpack(structFileHeader, fheader)
1521 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001522 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001523
Antoine Pitrou17babc52012-11-17 23:50:08 +01001524 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1525 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1526 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001527
Antoine Pitrou8572da52012-11-17 23:52:05 +01001528 if zinfo.flag_bits & 0x20:
1529 # Zip 2.7: compressed patched data
1530 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001531
Antoine Pitrou8572da52012-11-17 23:52:05 +01001532 if zinfo.flag_bits & 0x40:
1533 # strong encryption
1534 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001535
Antoine Pitrou17babc52012-11-17 23:50:08 +01001536 if zinfo.flag_bits & 0x800:
1537 # UTF-8 filename
1538 fname_str = fname.decode("utf-8")
1539 else:
1540 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001541
Antoine Pitrou17babc52012-11-17 23:50:08 +01001542 if fname_str != zinfo.orig_filename:
1543 raise BadZipFile(
1544 'File name in directory %r and header %r differ.'
1545 % (zinfo.orig_filename, fname))
1546
1547 # check for encrypted flag & handle password
1548 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001549 if is_encrypted:
1550 if not pwd:
1551 pwd = self.pwd
1552 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001553 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001554 "required for extraction" % name)
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001555 else:
1556 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001557
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001558 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001559 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001560 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001561 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001562
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001563 def _open_to_write(self, zinfo, force_zip64=False):
1564 if force_zip64 and not self._allowZip64:
1565 raise ValueError(
1566 "force_zip64 is True, but allowZip64 was False when opening "
1567 "the ZIP file."
1568 )
1569 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001570 raise ValueError("Can't write to the ZIP file while there is "
1571 "another write handle open on it. "
1572 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001573
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001574 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001575 zinfo.compress_size = 0
1576 zinfo.CRC = 0
1577
1578 zinfo.flag_bits = 0x00
1579 if zinfo.compress_type == ZIP_LZMA:
1580 # Compressed data includes an end-of-stream (EOS) marker
1581 zinfo.flag_bits |= 0x02
1582 if not self._seekable:
1583 zinfo.flag_bits |= 0x08
1584
1585 if not zinfo.external_attr:
1586 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1587
1588 # Compressed size can be larger than uncompressed size
1589 zip64 = self._allowZip64 and \
1590 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1591
1592 if self._seekable:
1593 self.fp.seek(self.start_dir)
1594 zinfo.header_offset = self.fp.tell()
1595
1596 self._writecheck(zinfo)
1597 self._didModify = True
1598
1599 self.fp.write(zinfo.FileHeader(zip64))
1600
1601 self._writing = True
1602 return _ZipWriteFile(self, zinfo, zip64)
1603
Christian Heimes790c8232008-01-07 21:14:23 +00001604 def extract(self, member, path=None, pwd=None):
1605 """Extract a member from the archive to the current working directory,
1606 using its full name. Its file information is extracted as accurately
1607 as possible. `member' may be a filename or a ZipInfo object. You can
1608 specify a different directory using `path'.
1609 """
Christian Heimes790c8232008-01-07 21:14:23 +00001610 if path is None:
1611 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001612 else:
1613 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001614
1615 return self._extract_member(member, path, pwd)
1616
1617 def extractall(self, path=None, members=None, pwd=None):
1618 """Extract all members from the archive to the current working
1619 directory. `path' specifies a different directory to extract to.
1620 `members' is optional and must be a subset of the list returned
1621 by namelist().
1622 """
1623 if members is None:
1624 members = self.namelist()
1625
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001626 if path is None:
1627 path = os.getcwd()
1628 else:
1629 path = os.fspath(path)
1630
Christian Heimes790c8232008-01-07 21:14:23 +00001631 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001632 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001633
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001634 @classmethod
1635 def _sanitize_windows_name(cls, arcname, pathsep):
1636 """Replace bad characters and remove trailing dots from parts."""
1637 table = cls._windows_illegal_name_trans_table
1638 if not table:
1639 illegal = ':<>|"?*'
1640 table = str.maketrans(illegal, '_' * len(illegal))
1641 cls._windows_illegal_name_trans_table = table
1642 arcname = arcname.translate(table)
1643 # remove trailing dots
1644 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1645 # rejoin, removing empty parts.
1646 arcname = pathsep.join(x for x in arcname if x)
1647 return arcname
1648
Christian Heimes790c8232008-01-07 21:14:23 +00001649 def _extract_member(self, member, targetpath, pwd):
1650 """Extract the ZipInfo object 'member' to a physical
1651 file on the path targetpath.
1652 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001653 if not isinstance(member, ZipInfo):
1654 member = self.getinfo(member)
1655
Christian Heimes790c8232008-01-07 21:14:23 +00001656 # build the destination pathname, replacing
1657 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001658 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001659
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001660 if os.path.altsep:
1661 arcname = arcname.replace(os.path.altsep, os.path.sep)
1662 # interpret absolute pathname as relative, remove drive letter or
1663 # UNC path, redundant separators, "." and ".." components.
1664 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001665 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001666 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001667 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001668 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001669 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001670 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001671
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001672 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001673 targetpath = os.path.normpath(targetpath)
1674
1675 # Create all upper directories if necessary.
1676 upperdirs = os.path.dirname(targetpath)
1677 if upperdirs and not os.path.exists(upperdirs):
1678 os.makedirs(upperdirs)
1679
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001680 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001681 if not os.path.isdir(targetpath):
1682 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001683 return targetpath
1684
Antoine Pitrou17babc52012-11-17 23:50:08 +01001685 with self.open(member, pwd=pwd) as source, \
1686 open(targetpath, "wb") as target:
1687 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001688
1689 return targetpath
1690
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001691 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001692 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001693 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001694 import warnings
1695 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001696 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001697 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001698 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001699 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001700 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001701 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001702 if not self._allowZip64:
1703 requires_zip64 = None
1704 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1705 requires_zip64 = "Files count"
1706 elif zinfo.file_size > ZIP64_LIMIT:
1707 requires_zip64 = "Filesize"
1708 elif zinfo.header_offset > ZIP64_LIMIT:
1709 requires_zip64 = "Zipfile size"
1710 if requires_zip64:
1711 raise LargeZipFile(requires_zip64 +
1712 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001713
Bo Baylesce237c72018-01-29 23:54:07 -06001714 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001715 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001716 """Put the bytes from filename into the archive under the name
1717 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001718 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001719 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001720 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001721 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001722 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001723 "Can't write to ZIP archive while an open writing handle exists"
1724 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001725
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001726 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001727 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001728
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001729 if zinfo.is_dir():
1730 zinfo.compress_size = 0
1731 zinfo.CRC = 0
1732 else:
1733 if compress_type is not None:
1734 zinfo.compress_type = compress_type
1735 else:
1736 zinfo.compress_type = self.compression
1737
Bo Baylesce237c72018-01-29 23:54:07 -06001738 if compresslevel is not None:
1739 zinfo._compresslevel = compresslevel
1740 else:
1741 zinfo._compresslevel = self.compresslevel
1742
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001743 if zinfo.is_dir():
1744 with self._lock:
1745 if self._seekable:
1746 self.fp.seek(self.start_dir)
1747 zinfo.header_offset = self.fp.tell() # Start of header bytes
1748 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001749 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001750 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001751
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001752 self._writecheck(zinfo)
1753 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001754
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001755 self.filelist.append(zinfo)
1756 self.NameToInfo[zinfo.filename] = zinfo
1757 self.fp.write(zinfo.FileHeader(False))
1758 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001759 else:
1760 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1761 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001762
Bo Baylesce237c72018-01-29 23:54:07 -06001763 def writestr(self, zinfo_or_arcname, data,
1764 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001765 """Write a file into the archive. The contents is 'data', which
1766 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1767 it is encoded as UTF-8 first.
1768 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001769 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001770 if isinstance(data, str):
1771 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001772 if not isinstance(zinfo_or_arcname, ZipInfo):
1773 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001774 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001775 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001776 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001777 if zinfo.filename[-1] == '/':
1778 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1779 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1780 else:
1781 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001782 else:
1783 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001784
1785 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001786 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001787 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001788 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001789 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001790 "Can't write to ZIP archive while an open writing handle exists."
1791 )
1792
1793 if compress_type is not None:
1794 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001795
Bo Baylesce237c72018-01-29 23:54:07 -06001796 if compresslevel is not None:
1797 zinfo._compresslevel = compresslevel
1798
Guido van Rossum85825dc2007-08-27 17:03:28 +00001799 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001800 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001801 with self.open(zinfo, mode='w') as dest:
1802 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001803
1804 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001805 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001806 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001807
1808 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001809 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001810 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001811 if self.fp is None:
1812 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001813
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001814 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001815 raise ValueError("Can't close the ZIP file while there is "
1816 "an open writing handle on it. "
1817 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001818
Antoine Pitrou17babc52012-11-17 23:50:08 +01001819 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001820 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001821 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001822 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001823 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001824 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001825 finally:
1826 fp = self.fp
1827 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001828 self._fpclose(fp)
1829
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001830 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001831 for zinfo in self.filelist: # write central directory
1832 dt = zinfo.date_time
1833 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1834 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1835 extra = []
1836 if zinfo.file_size > ZIP64_LIMIT \
1837 or zinfo.compress_size > ZIP64_LIMIT:
1838 extra.append(zinfo.file_size)
1839 extra.append(zinfo.compress_size)
1840 file_size = 0xffffffff
1841 compress_size = 0xffffffff
1842 else:
1843 file_size = zinfo.file_size
1844 compress_size = zinfo.compress_size
1845
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001846 if zinfo.header_offset > ZIP64_LIMIT:
1847 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001848 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001849 else:
1850 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001851
1852 extra_data = zinfo.extra
1853 min_version = 0
1854 if extra:
1855 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001856 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001857 extra_data = struct.pack(
1858 '<HH' + 'Q'*len(extra),
1859 1, 8*len(extra), *extra) + extra_data
1860
1861 min_version = ZIP64_VERSION
1862
1863 if zinfo.compress_type == ZIP_BZIP2:
1864 min_version = max(BZIP2_VERSION, min_version)
1865 elif zinfo.compress_type == ZIP_LZMA:
1866 min_version = max(LZMA_VERSION, min_version)
1867
1868 extract_version = max(min_version, zinfo.extract_version)
1869 create_version = max(min_version, zinfo.create_version)
1870 try:
1871 filename, flag_bits = zinfo._encodeFilenameFlags()
1872 centdir = struct.pack(structCentralDir,
1873 stringCentralDir, create_version,
1874 zinfo.create_system, extract_version, zinfo.reserved,
1875 flag_bits, zinfo.compress_type, dostime, dosdate,
1876 zinfo.CRC, compress_size, file_size,
1877 len(filename), len(extra_data), len(zinfo.comment),
1878 0, zinfo.internal_attr, zinfo.external_attr,
1879 header_offset)
1880 except DeprecationWarning:
1881 print((structCentralDir, stringCentralDir, create_version,
1882 zinfo.create_system, extract_version, zinfo.reserved,
1883 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1884 zinfo.CRC, compress_size, file_size,
1885 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1886 0, zinfo.internal_attr, zinfo.external_attr,
1887 header_offset), file=sys.stderr)
1888 raise
1889 self.fp.write(centdir)
1890 self.fp.write(filename)
1891 self.fp.write(extra_data)
1892 self.fp.write(zinfo.comment)
1893
1894 pos2 = self.fp.tell()
1895 # Write end-of-zip-archive record
1896 centDirCount = len(self.filelist)
1897 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001898 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001899 requires_zip64 = None
1900 if centDirCount > ZIP_FILECOUNT_LIMIT:
1901 requires_zip64 = "Files count"
1902 elif centDirOffset > ZIP64_LIMIT:
1903 requires_zip64 = "Central directory offset"
1904 elif centDirSize > ZIP64_LIMIT:
1905 requires_zip64 = "Central directory size"
1906 if requires_zip64:
1907 # Need to write the ZIP64 end-of-archive records
1908 if not self._allowZip64:
1909 raise LargeZipFile(requires_zip64 +
1910 " would require ZIP64 extensions")
1911 zip64endrec = struct.pack(
1912 structEndArchive64, stringEndArchive64,
1913 44, 45, 45, 0, 0, centDirCount, centDirCount,
1914 centDirSize, centDirOffset)
1915 self.fp.write(zip64endrec)
1916
1917 zip64locrec = struct.pack(
1918 structEndArchive64Locator,
1919 stringEndArchive64Locator, 0, pos2, 1)
1920 self.fp.write(zip64locrec)
1921 centDirCount = min(centDirCount, 0xFFFF)
1922 centDirSize = min(centDirSize, 0xFFFFFFFF)
1923 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1924
1925 endrec = struct.pack(structEndArchive, stringEndArchive,
1926 0, 0, centDirCount, centDirCount,
1927 centDirSize, centDirOffset, len(self._comment))
1928 self.fp.write(endrec)
1929 self.fp.write(self._comment)
1930 self.fp.flush()
1931
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001932 def _fpclose(self, fp):
1933 assert self._fileRefCnt > 0
1934 self._fileRefCnt -= 1
1935 if not self._fileRefCnt and not self._filePassed:
1936 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001937
1938
1939class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001940 """Class to create ZIP archives with Python library files and packages."""
1941
Georg Brandl8334fd92010-12-04 10:26:46 +00001942 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001943 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001944 ZipFile.__init__(self, file, mode=mode, compression=compression,
1945 allowZip64=allowZip64)
1946 self._optimize = optimize
1947
Christian Tismer59202e52013-10-21 03:59:23 +02001948 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001949 """Add all files from "pathname" to the ZIP archive.
1950
Fred Drake484d7352000-10-02 21:14:52 +00001951 If pathname is a package directory, search the directory and
1952 all package subdirectories recursively for all *.py and enter
1953 the modules into the archive. If pathname is a plain
1954 directory, listdir *.py and enter all modules. Else, pathname
1955 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001956 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001957 This method will compile the module.py into module.pyc if
1958 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001959 If filterfunc(pathname) is given, it is called with every argument.
1960 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001961 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001962 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001963 if filterfunc and not filterfunc(pathname):
1964 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001965 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001966 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001967 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001968 dir, name = os.path.split(pathname)
1969 if os.path.isdir(pathname):
1970 initname = os.path.join(pathname, "__init__.py")
1971 if os.path.isfile(initname):
1972 # This is a package directory, add it
1973 if basename:
1974 basename = "%s/%s" % (basename, name)
1975 else:
1976 basename = name
1977 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001978 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001979 fname, arcname = self._get_codename(initname[0:-3], basename)
1980 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001981 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001982 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001983 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001984 dirlist.remove("__init__.py")
1985 # Add all *.py files and package subdirectories
1986 for filename in dirlist:
1987 path = os.path.join(pathname, filename)
1988 root, ext = os.path.splitext(filename)
1989 if os.path.isdir(path):
1990 if os.path.isfile(os.path.join(path, "__init__.py")):
1991 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001992 self.writepy(path, basename,
1993 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001994 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001995 if filterfunc and not filterfunc(path):
1996 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001997 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001998 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001999 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002000 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002001 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002002 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002003 self.write(fname, arcname)
2004 else:
2005 # This is NOT a package directory, add its files at top level
2006 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002007 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002008 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002009 path = os.path.join(pathname, filename)
2010 root, ext = os.path.splitext(filename)
2011 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002012 if filterfunc and not filterfunc(path):
2013 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002014 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002015 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002016 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002017 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002018 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002019 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002020 self.write(fname, arcname)
2021 else:
2022 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002023 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002024 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002025 fname, arcname = self._get_codename(pathname[0:-3], basename)
2026 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002027 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002028 self.write(fname, arcname)
2029
2030 def _get_codename(self, pathname, basename):
2031 """Return (filename, archivename) for the path.
2032
Fred Drake484d7352000-10-02 21:14:52 +00002033 Given a module name path, return the correct file path and
2034 archive name, compiling if necessary. For example, given
2035 /python/lib/string, return (/python/lib/string.pyc, string).
2036 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002037 def _compile(file, optimize=-1):
2038 import py_compile
2039 if self.debug:
2040 print("Compiling", file)
2041 try:
2042 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002043 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002044 print(err.msg)
2045 return False
2046 return True
2047
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002048 file_py = pathname + ".py"
2049 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002050 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2051 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2052 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002053 if self._optimize == -1:
2054 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002055 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002056 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2057 # Use .pyc file.
2058 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002059 elif (os.path.isfile(pycache_opt0) and
2060 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002061 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2062 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002063 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002064 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002065 elif (os.path.isfile(pycache_opt1) and
2066 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2067 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002068 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002069 fname = pycache_opt1
2070 arcname = file_pyc
2071 elif (os.path.isfile(pycache_opt2) and
2072 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2073 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2074 # file name in the archive.
2075 fname = pycache_opt2
2076 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002077 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002078 # Compile py into PEP 3147 pyc file.
2079 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002080 if sys.flags.optimize == 0:
2081 fname = pycache_opt0
2082 elif sys.flags.optimize == 1:
2083 fname = pycache_opt1
2084 else:
2085 fname = pycache_opt2
2086 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002087 else:
2088 fname = arcname = file_py
2089 else:
2090 # new mode: use given optimization level
2091 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002092 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002093 arcname = file_pyc
2094 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002095 arcname = file_pyc
2096 if self._optimize == 1:
2097 fname = pycache_opt1
2098 elif self._optimize == 2:
2099 fname = pycache_opt2
2100 else:
2101 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2102 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002103 if not (os.path.isfile(fname) and
2104 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2105 if not _compile(file_py, optimize=self._optimize):
2106 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002107 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002108 if basename:
2109 archivename = "%s/%s" % (basename, archivename)
2110 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002111
2112
shireenraoa4e29912019-08-24 11:26:41 -04002113def _unique_everseen(iterable, key=None):
2114 "List unique elements, preserving order. Remember all elements ever seen."
2115 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2116 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2117 seen = set()
2118 seen_add = seen.add
2119 if key is None:
2120 for element in itertools.filterfalse(seen.__contains__, iterable):
2121 seen_add(element)
2122 yield element
2123 else:
2124 for element in iterable:
2125 k = key(element)
2126 if k not in seen:
2127 seen_add(k)
2128 yield element
2129
2130
2131def _parents(path):
2132 """
2133 Given a path with elements separated by
2134 posixpath.sep, generate all parents of that path.
2135
2136 >>> list(_parents('b/d'))
2137 ['b']
2138 >>> list(_parents('/b/d/'))
2139 ['/b']
2140 >>> list(_parents('b/d/f/'))
2141 ['b/d', 'b']
2142 >>> list(_parents('b'))
2143 []
2144 >>> list(_parents(''))
2145 []
2146 """
2147 return itertools.islice(_ancestry(path), 1, None)
2148
2149
2150def _ancestry(path):
2151 """
2152 Given a path with elements separated by
2153 posixpath.sep, generate all elements of that path
2154
2155 >>> list(_ancestry('b/d'))
2156 ['b/d', 'b']
2157 >>> list(_ancestry('/b/d/'))
2158 ['/b/d', '/b']
2159 >>> list(_ancestry('b/d/f/'))
2160 ['b/d/f', 'b/d', 'b']
2161 >>> list(_ancestry('b'))
2162 ['b']
2163 >>> list(_ancestry(''))
2164 []
2165 """
2166 path = path.rstrip(posixpath.sep)
2167 while path and path != posixpath.sep:
2168 yield path
2169 path, tail = posixpath.split(path)
2170
2171
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002172class Path:
2173 """
2174 A pathlib-compatible interface for zip files.
2175
2176 Consider a zip file with this structure::
2177
2178 .
2179 ├── a.txt
2180 └── b
2181 ├── c.txt
2182 └── d
2183 └── e.txt
2184
2185 >>> data = io.BytesIO()
2186 >>> zf = ZipFile(data, 'w')
2187 >>> zf.writestr('a.txt', 'content of a')
2188 >>> zf.writestr('b/c.txt', 'content of c')
2189 >>> zf.writestr('b/d/e.txt', 'content of e')
2190 >>> zf.filename = 'abcde.zip'
2191
2192 Path accepts the zipfile object itself or a filename
2193
2194 >>> root = Path(zf)
2195
2196 From there, several path operations are available.
2197
2198 Directory iteration (including the zip file itself):
2199
2200 >>> a, b = root.iterdir()
2201 >>> a
2202 Path('abcde.zip', 'a.txt')
2203 >>> b
2204 Path('abcde.zip', 'b/')
2205
2206 name property:
2207
2208 >>> b.name
2209 'b'
2210
2211 join with divide operator:
2212
2213 >>> c = b / 'c.txt'
2214 >>> c
2215 Path('abcde.zip', 'b/c.txt')
2216 >>> c.name
2217 'c.txt'
2218
2219 Read text:
2220
2221 >>> c.read_text()
2222 'content of c'
2223
2224 existence:
2225
2226 >>> c.exists()
2227 True
2228 >>> (b / 'missing.txt').exists()
2229 False
2230
Xtreak0d702272019-06-03 04:42:33 +05302231 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002232
2233 >>> str(c)
2234 'abcde.zip/b/c.txt'
2235 """
2236
2237 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2238
2239 def __init__(self, root, at=""):
2240 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2241 self.at = at
2242
2243 @property
2244 def open(self):
2245 return functools.partial(self.root.open, self.at)
2246
2247 @property
2248 def name(self):
2249 return posixpath.basename(self.at.rstrip("/"))
2250
2251 def read_text(self, *args, **kwargs):
2252 with self.open() as strm:
2253 return io.TextIOWrapper(strm, *args, **kwargs).read()
2254
2255 def read_bytes(self):
2256 with self.open() as strm:
2257 return strm.read()
2258
2259 def _is_child(self, path):
2260 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2261
2262 def _next(self, at):
2263 return Path(self.root, at)
2264
2265 def is_dir(self):
2266 return not self.at or self.at.endswith("/")
2267
2268 def is_file(self):
2269 return not self.is_dir()
2270
2271 def exists(self):
2272 return self.at in self._names()
2273
2274 def iterdir(self):
2275 if not self.is_dir():
2276 raise ValueError("Can't listdir a file")
2277 subs = map(self._next, self._names())
2278 return filter(self._is_child, subs)
2279
2280 def __str__(self):
2281 return posixpath.join(self.root.filename, self.at)
2282
2283 def __repr__(self):
2284 return self.__repr.format(self=self)
2285
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002286 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002287 next = posixpath.join(self.at, add)
2288 next_dir = posixpath.join(self.at, add, "")
2289 names = self._names()
2290 return self._next(next_dir if next not in names and next_dir in names else next)
2291
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002292 __truediv__ = joinpath
2293
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002294 @staticmethod
shireenraoa4e29912019-08-24 11:26:41 -04002295 def _implied_dirs(names):
2296 return _unique_everseen(
2297 parent + "/"
2298 for name in names
2299 for parent in _parents(name)
2300 if parent + "/" not in names
2301 )
2302
2303 @classmethod
2304 def _add_implied_dirs(cls, names):
2305 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002306
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002307 @property
2308 def parent(self):
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002309 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002310 if parent_at:
2311 parent_at += '/'
2312 return self._next(parent_at)
2313
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002314 def _names(self):
2315 return self._add_implied_dirs(self.root.namelist())
2316
2317
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002318def main(args=None):
2319 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002320
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002321 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002322 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002323 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002324 group.add_argument('-l', '--list', metavar='<zipfile>',
2325 help='Show listing of a zipfile')
2326 group.add_argument('-e', '--extract', nargs=2,
2327 metavar=('<zipfile>', '<output_dir>'),
2328 help='Extract zipfile into target dir')
2329 group.add_argument('-c', '--create', nargs='+',
2330 metavar=('<name>', '<file>'),
2331 help='Create zipfile from sources')
2332 group.add_argument('-t', '--test', metavar='<zipfile>',
2333 help='Test if a zipfile is valid')
2334 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002335
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002336 if args.test is not None:
2337 src = args.test
2338 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002339 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002340 if badfile:
2341 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002342 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002343
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002344 elif args.list is not None:
2345 src = args.list
2346 with ZipFile(src, 'r') as zf:
2347 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002348
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002349 elif args.extract is not None:
2350 src, curdir = args.extract
2351 with ZipFile(src, 'r') as zf:
2352 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002353
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002354 elif args.create is not None:
2355 zip_name = args.create.pop(0)
2356 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002357
2358 def addToZip(zf, path, zippath):
2359 if os.path.isfile(path):
2360 zf.write(path, zippath, ZIP_DEFLATED)
2361 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002362 if zippath:
2363 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002364 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002365 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002366 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002367 # else: ignore
2368
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002369 with ZipFile(zip_name, 'w') as zf:
2370 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002371 zippath = os.path.basename(path)
2372 if not zippath:
2373 zippath = os.path.basename(os.path.dirname(path))
2374 if zippath in ('', os.curdir, os.pardir):
2375 zippath = ''
2376 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002377
2378if __name__ == "__main__":
2379 main()