blob: 2da87ef505e6ecd6c8c8ffb227184dd1e4720ef9 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
shireenraoa4e29912019-08-24 11:26:41 -040010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200379 self.compress_size = 0 # Size of the compressed file
380 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000382 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000383 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200468 data = extra[4:ln+4]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 # ZIP64 extension (large files and/or large archives)
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200470 try:
471 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
472 field = "File size"
473 self.file_size, = unpack('<Q', data[:8])
474 data = data[8:]
475 if self.compress_size == 0xFFFF_FFFF:
476 field = "Compress size"
477 self.compress_size, = unpack('<Q', data[:8])
478 data = data[8:]
479 if self.header_offset == 0xFFFF_FFFF:
480 field = "Header offset"
481 self.header_offset, = unpack('<Q', data[:8])
482 except struct.error:
483 raise BadZipFile(f"Corrupt zip64 extra field. "
484 f"{field} not found.") from None
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000485
486 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000487
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200488 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200489 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200490 """Construct an appropriate ZipInfo for a file on the filesystem.
491
492 filename should be the path to a file or directory on the filesystem.
493
494 arcname is the name which it will have within the archive (by default,
495 this will be the same as filename, but without a drive letter and with
496 leading path separators removed).
497 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200498 if isinstance(filename, os.PathLike):
499 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200500 st = os.stat(filename)
501 isdir = stat.S_ISDIR(st.st_mode)
502 mtime = time.localtime(st.st_mtime)
503 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200504 if not strict_timestamps and date_time[0] < 1980:
505 date_time = (1980, 1, 1, 0, 0, 0)
506 elif not strict_timestamps and date_time[0] > 2107:
507 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200508 # Create ZipInfo instance to store file information
509 if arcname is None:
510 arcname = filename
511 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
512 while arcname[0] in (os.sep, os.altsep):
513 arcname = arcname[1:]
514 if isdir:
515 arcname += '/'
516 zinfo = cls(arcname, date_time)
517 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
518 if isdir:
519 zinfo.file_size = 0
520 zinfo.external_attr |= 0x10 # MS-DOS directory flag
521 else:
522 zinfo.file_size = st.st_size
523
524 return zinfo
525
526 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300527 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200528 return self.filename[-1] == '/'
529
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000530
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300531# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
532# internal keys. We noticed that a direct implementation is faster than
533# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000534
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300535_crctable = None
536def _gen_crc(crc):
537 for j in range(8):
538 if crc & 1:
539 crc = (crc >> 1) ^ 0xEDB88320
540 else:
541 crc >>= 1
542 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300544# ZIP supports a password-based form of encryption. Even though known
545# plaintext attacks have been found against it, it is still useful
546# to be able to get data out of such a file.
547#
548# Usage:
549# zd = _ZipDecrypter(mypwd)
550# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552def _ZipDecrypter(pwd):
553 key0 = 305419896
554 key1 = 591751049
555 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000556
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300557 global _crctable
558 if _crctable is None:
559 _crctable = list(map(_gen_crc, range(256)))
560 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000561
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300562 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300564 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 def update_keys(c):
567 nonlocal key0, key1, key2
568 key0 = crc32(c, key0)
569 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
570 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
571 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573 for p in pwd:
574 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000575
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300576 def decrypter(data):
577 """Decrypt a bytes object."""
578 result = bytearray()
579 append = result.append
580 for c in data:
581 k = key2 | 2
582 c ^= ((k * (k^1)) >> 8) & 0xFF
583 update_keys(c)
584 append(c)
585 return bytes(result)
586
587 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000588
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200589
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200590class LZMACompressor:
591
592 def __init__(self):
593 self._comp = None
594
595 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200596 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200597 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200598 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599 ])
600 return struct.pack('<BBH', 9, 4, len(props)) + props
601
602 def compress(self, data):
603 if self._comp is None:
604 return self._init() + self._comp.compress(data)
605 return self._comp.compress(data)
606
607 def flush(self):
608 if self._comp is None:
609 return self._init() + self._comp.flush()
610 return self._comp.flush()
611
612
613class LZMADecompressor:
614
615 def __init__(self):
616 self._decomp = None
617 self._unconsumed = b''
618 self.eof = False
619
620 def decompress(self, data):
621 if self._decomp is None:
622 self._unconsumed += data
623 if len(self._unconsumed) <= 4:
624 return b''
625 psize, = struct.unpack('<H', self._unconsumed[2:4])
626 if len(self._unconsumed) <= 4 + psize:
627 return b''
628
629 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200630 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
631 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200632 ])
633 data = self._unconsumed[4 + psize:]
634 del self._unconsumed
635
636 result = self._decomp.decompress(data)
637 self.eof = self._decomp.eof
638 return result
639
640
641compressor_names = {
642 0: 'store',
643 1: 'shrink',
644 2: 'reduce',
645 3: 'reduce',
646 4: 'reduce',
647 5: 'reduce',
648 6: 'implode',
649 7: 'tokenize',
650 8: 'deflate',
651 9: 'deflate64',
652 10: 'implode',
653 12: 'bzip2',
654 14: 'lzma',
655 18: 'terse',
656 19: 'lz77',
657 97: 'wavpack',
658 98: 'ppmd',
659}
660
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200661def _check_compression(compression):
662 if compression == ZIP_STORED:
663 pass
664 elif compression == ZIP_DEFLATED:
665 if not zlib:
666 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200667 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 elif compression == ZIP_BZIP2:
669 if not bz2:
670 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200671 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200672 elif compression == ZIP_LZMA:
673 if not lzma:
674 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200675 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200676 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300677 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678
679
Bo Baylesce237c72018-01-29 23:54:07 -0600680def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200681 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600682 if compresslevel is not None:
683 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
684 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600686 if compresslevel is not None:
687 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200688 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600689 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200690 elif compress_type == ZIP_LZMA:
691 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200692 else:
693 return None
694
695
696def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300697 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200698 if compress_type == ZIP_STORED:
699 return None
700 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200701 return zlib.decompressobj(-15)
702 elif compress_type == ZIP_BZIP2:
703 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200704 elif compress_type == ZIP_LZMA:
705 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200706 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200707 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200708 if descr:
709 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
710 else:
711 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200712
713
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200714class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300715 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200716 self._file = file
717 self._pos = pos
718 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200719 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300720 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700721 self.seekable = file.seekable
722 self.tell = file.tell
723
724 def seek(self, offset, whence=0):
725 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200726 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700727 raise ValueError("Can't reposition in the ZIP file while "
728 "there is an open writing handle on it. "
729 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200730 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700731 self._pos = self._file.tell()
732 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200733
734 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200735 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300736 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300737 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300738 "is an open writing handle on it. "
739 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200740 self._file.seek(self._pos)
741 data = self._file.read(n)
742 self._pos = self._file.tell()
743 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200744
745 def close(self):
746 if self._file is not None:
747 fileobj = self._file
748 self._file = None
749 self._close(fileobj)
750
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200751# Provide the tell method for unseekable stream
752class _Tellable:
753 def __init__(self, fp):
754 self.fp = fp
755 self.offset = 0
756
757 def write(self, data):
758 n = self.fp.write(data)
759 self.offset += n
760 return n
761
762 def tell(self):
763 return self.offset
764
765 def flush(self):
766 self.fp.flush()
767
768 def close(self):
769 self.fp.close()
770
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200771
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000772class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 """File-like object for reading an archive member.
774 Is returned by ZipFile.open().
775 """
776
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 # Max size supported by decompressor.
778 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 # Read from compressed files in 4k blocks.
781 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782
John Jolly066df4f2018-01-30 01:51:35 -0700783 # Chunk size to read during seek
784 MAX_SEEK_READ = 1 << 24
785
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200786 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000787 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000788 self._fileobj = fileobj
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200789 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000790 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000791
Ezio Melotti92b47432010-01-28 01:44:41 +0000792 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000793 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200794 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000795
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200796 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200798 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000799 self._readbuffer = b''
800 self._offset = 0
801
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 self.newlines = None
803
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805 self.name = zipinfo.filename
806
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000807 if hasattr(zipinfo, 'CRC'):
808 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000809 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000810 else:
811 self._expected_crc = None
812
John Jolly066df4f2018-01-30 01:51:35 -0700813 self._seekable = False
814 try:
815 if fileobj.seekable():
816 self._orig_compress_start = fileobj.tell()
817 self._orig_compress_size = zipinfo.compress_size
818 self._orig_file_size = zipinfo.file_size
819 self._orig_start_crc = self._running_crc
820 self._seekable = True
821 except AttributeError:
822 pass
823
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200824 self._decrypter = None
825 if pwd:
826 if zipinfo.flag_bits & 0x8:
827 # compare against the file type from extended local headers
828 check_byte = (zipinfo._raw_time >> 8) & 0xff
829 else:
830 # compare against the CRC otherwise
831 check_byte = (zipinfo.CRC >> 24) & 0xff
832 h = self._init_decrypter()
833 if h != check_byte:
834 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
835
836
837 def _init_decrypter(self):
838 self._decrypter = _ZipDecrypter(self._pwd)
839 # The first 12 bytes in the cypher stream is an encryption header
840 # used to strengthen the algorithm. The first 11 bytes are
841 # completely random, while the 12th contains the MSB of the CRC,
842 # or the MSB of the file time depending on the header type
843 # and is used to check the correctness of the password.
844 header = self._fileobj.read(12)
845 self._compress_left -= 12
846 return self._decrypter(header)[11]
847
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200848 def __repr__(self):
849 result = ['<%s.%s' % (self.__class__.__module__,
850 self.__class__.__qualname__)]
851 if not self.closed:
852 result.append(' name=%r mode=%r' % (self.name, self.mode))
853 if self._compress_type != ZIP_STORED:
854 result.append(' compress_type=%s' %
855 compressor_names.get(self._compress_type,
856 self._compress_type))
857 else:
858 result.append(' [closed]')
859 result.append('>')
860 return ''.join(result)
861
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000862 def readline(self, limit=-1):
863 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000865 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
Serhiy Storchakae670be22016-06-11 19:32:44 +0300868 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000869 # Shortcut common case - newline found in buffer.
870 i = self._readbuffer.find(b'\n', self._offset) + 1
871 if i > 0:
872 line = self._readbuffer[self._offset: i]
873 self._offset = i
874 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875
Serhiy Storchakae670be22016-06-11 19:32:44 +0300876 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000877
878 def peek(self, n=1):
879 """Returns buffered bytes without advancing the position."""
880 if n > len(self._readbuffer) - self._offset:
881 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 if len(chunk) > self._offset:
883 self._readbuffer = chunk + self._readbuffer[self._offset:]
884 self._offset = 0
885 else:
886 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000887
888 # Return up to 512 bytes to reduce allocation overhead for tight loops.
889 return self._readbuffer[self._offset: self._offset + 512]
890
891 def readable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +1100892 if self.closed:
893 raise ValueError("I/O operation on closed file.")
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000894 return True
895
896 def read(self, n=-1):
897 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800898 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 """
Daniel Hillier8d62df62019-11-30 19:30:47 +1100900 if self.closed:
901 raise ValueError("read from closed file.")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200902 if n is None or n < 0:
903 buf = self._readbuffer[self._offset:]
904 self._readbuffer = b''
905 self._offset = 0
906 while not self._eof:
907 buf += self._read1(self.MAX_N)
908 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909
Antoine Pitrou78157b32012-06-23 16:44:48 +0200910 end = n + self._offset
911 if end < len(self._readbuffer):
912 buf = self._readbuffer[self._offset:end]
913 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 return buf
915
Antoine Pitrou78157b32012-06-23 16:44:48 +0200916 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200917 buf = self._readbuffer[self._offset:]
918 self._readbuffer = b''
919 self._offset = 0
920 while n > 0 and not self._eof:
921 data = self._read1(n)
922 if n < len(data):
923 self._readbuffer = data
924 self._offset = n
925 buf += data[:n]
926 break
927 buf += data
928 n -= len(data)
929 return buf
930
931 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000932 # Update the CRC using the given data.
933 if self._expected_crc is None:
934 # No need to compute the CRC if we don't have a reference value
935 return
Martin Panterb82032f2015-12-11 05:19:29 +0000936 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000937 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200938 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000939 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000940
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000941 def read1(self, n):
942 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200944 if n is None or n < 0:
945 buf = self._readbuffer[self._offset:]
946 self._readbuffer = b''
947 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300948 while not self._eof:
949 data = self._read1(self.MAX_N)
950 if data:
951 buf += data
952 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200953 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954
Antoine Pitrou78157b32012-06-23 16:44:48 +0200955 end = n + self._offset
956 if end < len(self._readbuffer):
957 buf = self._readbuffer[self._offset:end]
958 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200959 return buf
960
Antoine Pitrou78157b32012-06-23 16:44:48 +0200961 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 buf = self._readbuffer[self._offset:]
963 self._readbuffer = b''
964 self._offset = 0
965 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300966 while not self._eof:
967 data = self._read1(n)
968 if n < len(data):
969 self._readbuffer = data
970 self._offset = n
971 buf += data[:n]
972 break
973 if data:
974 buf += data
975 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200976 return buf
977
978 def _read1(self, n):
979 # Read up to n compressed bytes with at most one read() system call,
980 # decrypt and decompress them.
981 if self._eof or n <= 0:
982 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000983
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000984 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200985 if self._compress_type == ZIP_DEFLATED:
986 ## Handle unconsumed data.
987 data = self._decompressor.unconsumed_tail
988 if n > len(data):
989 data += self._read2(n - len(data))
990 else:
991 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200993 if self._compress_type == ZIP_STORED:
994 self._eof = self._compress_left <= 0
995 elif self._compress_type == ZIP_DEFLATED:
996 n = max(n, self.MIN_READ_SIZE)
997 data = self._decompressor.decompress(data, n)
998 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200999 self._compress_left <= 0 and
1000 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001001 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001002 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001003 else:
1004 data = self._decompressor.decompress(data)
1005 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001006
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001007 data = data[:self._left]
1008 self._left -= len(data)
1009 if self._left <= 0:
1010 self._eof = True
1011 self._update_crc(data)
1012 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001013
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001014 def _read2(self, n):
1015 if self._compress_left <= 0:
1016 return b''
1017
1018 n = max(n, self.MIN_READ_SIZE)
1019 n = min(n, self._compress_left)
1020
1021 data = self._fileobj.read(n)
1022 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001023 if not data:
1024 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001025
1026 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001027 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001028 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001029
Łukasz Langae94980a2010-11-22 23:31:26 +00001030 def close(self):
1031 try:
1032 if self._close_fileobj:
1033 self._fileobj.close()
1034 finally:
1035 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001036
John Jolly066df4f2018-01-30 01:51:35 -07001037 def seekable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001038 if self.closed:
1039 raise ValueError("I/O operation on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001040 return self._seekable
1041
1042 def seek(self, offset, whence=0):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001043 if self.closed:
1044 raise ValueError("seek on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001045 if not self._seekable:
1046 raise io.UnsupportedOperation("underlying stream is not seekable")
1047 curr_pos = self.tell()
1048 if whence == 0: # Seek from start of file
1049 new_pos = offset
1050 elif whence == 1: # Seek from current position
1051 new_pos = curr_pos + offset
1052 elif whence == 2: # Seek from EOF
1053 new_pos = self._orig_file_size + offset
1054 else:
1055 raise ValueError("whence must be os.SEEK_SET (0), "
1056 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1057
1058 if new_pos > self._orig_file_size:
1059 new_pos = self._orig_file_size
1060
1061 if new_pos < 0:
1062 new_pos = 0
1063
1064 read_offset = new_pos - curr_pos
1065 buff_offset = read_offset + self._offset
1066
1067 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1068 # Just move the _offset index if the new position is in the _readbuffer
1069 self._offset = buff_offset
1070 read_offset = 0
1071 elif read_offset < 0:
1072 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001073 self._fileobj.seek(self._orig_compress_start)
1074 self._running_crc = self._orig_start_crc
1075 self._compress_left = self._orig_compress_size
1076 self._left = self._orig_file_size
1077 self._readbuffer = b''
1078 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001079 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001080 self._eof = False
1081 read_offset = new_pos
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001082 if self._decrypter is not None:
1083 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001084
1085 while read_offset > 0:
1086 read_len = min(self.MAX_SEEK_READ, read_offset)
1087 self.read(read_len)
1088 read_offset -= read_len
1089
1090 return self.tell()
1091
1092 def tell(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001093 if self.closed:
1094 raise ValueError("tell on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001095 if not self._seekable:
1096 raise io.UnsupportedOperation("underlying stream is not seekable")
1097 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1098 return filepos
1099
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001100
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001101class _ZipWriteFile(io.BufferedIOBase):
1102 def __init__(self, zf, zinfo, zip64):
1103 self._zinfo = zinfo
1104 self._zip64 = zip64
1105 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001106 self._compressor = _get_compressor(zinfo.compress_type,
1107 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001108 self._file_size = 0
1109 self._compress_size = 0
1110 self._crc = 0
1111
1112 @property
1113 def _fileobj(self):
1114 return self._zipfile.fp
1115
1116 def writable(self):
1117 return True
1118
1119 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001120 if self.closed:
1121 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001122 nbytes = len(data)
1123 self._file_size += nbytes
1124 self._crc = crc32(data, self._crc)
1125 if self._compressor:
1126 data = self._compressor.compress(data)
1127 self._compress_size += len(data)
1128 self._fileobj.write(data)
1129 return nbytes
1130
1131 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001132 if self.closed:
1133 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001134 try:
1135 super().close()
1136 # Flush any data from the compressor, and update header info
1137 if self._compressor:
1138 buf = self._compressor.flush()
1139 self._compress_size += len(buf)
1140 self._fileobj.write(buf)
1141 self._zinfo.compress_size = self._compress_size
1142 else:
1143 self._zinfo.compress_size = self._file_size
1144 self._zinfo.CRC = self._crc
1145 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001146
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001147 # Write updated header info
1148 if self._zinfo.flag_bits & 0x08:
1149 # Write CRC and file sizes after the file data
1150 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1151 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1152 self._zinfo.compress_size, self._zinfo.file_size))
1153 self._zipfile.start_dir = self._fileobj.tell()
1154 else:
1155 if not self._zip64:
1156 if self._file_size > ZIP64_LIMIT:
1157 raise RuntimeError(
1158 'File size unexpectedly exceeded ZIP64 limit')
1159 if self._compress_size > ZIP64_LIMIT:
1160 raise RuntimeError(
1161 'Compressed size unexpectedly exceeded ZIP64 limit')
1162 # Seek backwards and write file header (which will now include
1163 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001164
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001165 # Preserve current position in file
1166 self._zipfile.start_dir = self._fileobj.tell()
1167 self._fileobj.seek(self._zinfo.header_offset)
1168 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1169 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001170
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001171 # Successfully written: Add file to our caches
1172 self._zipfile.filelist.append(self._zinfo)
1173 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1174 finally:
1175 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001176
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001177
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001178
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001180 """ Class with methods to open, read, write, close, list zip files.
1181
Bo Baylesce237c72018-01-29 23:54:07 -06001182 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1183 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001184
Fred Drake3d9091e2001-03-26 15:49:24 +00001185 file: Either the path to the file, or a file-like object.
1186 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001187 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1188 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001189 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1190 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001191 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1192 needed, otherwise it will raise an exception when this would
1193 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001194 compresslevel: None (default for the given compression type) or an integer
1195 specifying the level to pass to the compressor.
1196 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1197 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1198 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001199
Fred Drake3d9091e2001-03-26 15:49:24 +00001200 """
Fred Drake484d7352000-10-02 21:14:52 +00001201
Fred Drake90eac282001-02-28 05:29:34 +00001202 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001203 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001204
Bo Baylesce237c72018-01-29 23:54:07 -06001205 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001206 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001207 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1208 or append 'a'."""
1209 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001210 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001211
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001212 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001213
1214 self._allowZip64 = allowZip64
1215 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001216 self.debug = 0 # Level of printing: 0 through 3
1217 self.NameToInfo = {} # Find file info given name
1218 self.filelist = [] # List of ZipInfo instances for archive
1219 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001220 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001221 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001222 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001223 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001224 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001225
Fred Drake3d9091e2001-03-26 15:49:24 +00001226 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001227 if isinstance(file, os.PathLike):
1228 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001229 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001230 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001231 self._filePassed = 0
1232 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001233 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1234 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001235 filemode = modeDict[mode]
1236 while True:
1237 try:
1238 self.fp = io.open(file, filemode)
1239 except OSError:
1240 if filemode in modeDict:
1241 filemode = modeDict[filemode]
1242 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001243 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001244 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001245 else:
1246 self._filePassed = 1
1247 self.fp = file
1248 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001249 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001250 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001251 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001252 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001253
Antoine Pitrou17babc52012-11-17 23:50:08 +01001254 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001255 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001256 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001257 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001258 # set the modified flag so central directory gets written
1259 # even if no files are added to the archive
1260 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001261 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001262 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001263 except (AttributeError, OSError):
1264 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001265 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001266 self._seekable = False
1267 else:
1268 # Some file-like objects can provide tell() but not seek()
1269 try:
1270 self.fp.seek(self.start_dir)
1271 except (AttributeError, OSError):
1272 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001273 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001274 try:
1275 # See if file is a zip file
1276 self._RealGetContents()
1277 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001278 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001279 except BadZipFile:
1280 # file is not a zip file, just append
1281 self.fp.seek(0, 2)
1282
1283 # set the modified flag so central directory gets written
1284 # even if no files are added to the archive
1285 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001286 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001287 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001288 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001289 except:
1290 fp = self.fp
1291 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001292 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001293 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001294
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001295 def __enter__(self):
1296 return self
1297
1298 def __exit__(self, type, value, traceback):
1299 self.close()
1300
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001301 def __repr__(self):
1302 result = ['<%s.%s' % (self.__class__.__module__,
1303 self.__class__.__qualname__)]
1304 if self.fp is not None:
1305 if self._filePassed:
1306 result.append(' file=%r' % self.fp)
1307 elif self.filename is not None:
1308 result.append(' filename=%r' % self.filename)
1309 result.append(' mode=%r' % self.mode)
1310 else:
1311 result.append(' [closed]')
1312 result.append('>')
1313 return ''.join(result)
1314
Tim Peters7d3bad62001-04-04 18:56:49 +00001315 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001316 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001318 try:
1319 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001320 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001321 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001322 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001323 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001325 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001326 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1327 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001328 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001329
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001330 # "concat" is zero, unless zip was concatenated to another file
1331 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001332 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1333 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001334 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001335
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001337 inferred = concat + offset_cd
1338 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001340 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001341 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001342 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001343 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 total = 0
1345 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001346 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001347 if len(centdir) != sizeCentralDir:
1348 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001350 if centdir[_CD_SIGNATURE] != stringCentralDir:
1351 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001353 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001354 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001355 flags = centdir[5]
1356 if flags & 0x800:
1357 # UTF-8 file names extension
1358 filename = filename.decode('utf-8')
1359 else:
1360 # Historical ZIP filename encoding
1361 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001363 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001364 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1365 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001366 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001367 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001368 x.flag_bits, x.compress_type, t, d,
1369 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001370 if x.extract_version > MAX_EXTRACT_VERSION:
1371 raise NotImplementedError("zip file version %.1f" %
1372 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1374 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001375 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001377 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001378
1379 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001380 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001381 self.filelist.append(x)
1382 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001383
1384 # update total bytes read from central directory
1385 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1386 + centdir[_CD_EXTRA_FIELD_LENGTH]
1387 + centdir[_CD_COMMENT_LENGTH])
1388
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001389 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001390 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001391
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392
1393 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001394 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001395 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396
1397 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001398 """Return a list of class ZipInfo instances for files in the
1399 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 return self.filelist
1401
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001402 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001403 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001404 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1405 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001406 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001407 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001408 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1409 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001410
1411 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001412 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001413 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001414 for zinfo in self.filelist:
1415 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001416 # Read by chunks, to avoid an OverflowError or a
1417 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001418 with self.open(zinfo.filename, "r") as f:
1419 while f.read(chunk_size): # Check CRC-32
1420 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001421 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422 return zinfo.filename
1423
1424 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001425 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001426 info = self.NameToInfo.get(name)
1427 if info is None:
1428 raise KeyError(
1429 'There is no item named %r in the archive' % name)
1430
1431 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001432
Thomas Wouterscf297e42007-02-23 15:07:44 +00001433 def setpassword(self, pwd):
1434 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001435 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001436 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001437 if pwd:
1438 self.pwd = pwd
1439 else:
1440 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001441
R David Murrayf50b38a2012-04-12 18:44:58 -04001442 @property
1443 def comment(self):
1444 """The comment text associated with the ZIP file."""
1445 return self._comment
1446
1447 @comment.setter
1448 def comment(self, comment):
1449 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001450 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001451 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001452 if len(comment) > ZIP_MAX_COMMENT:
1453 import warnings
1454 warnings.warn('Archive comment is too long; truncating to %d bytes'
1455 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001456 comment = comment[:ZIP_MAX_COMMENT]
1457 self._comment = comment
1458 self._didModify = True
1459
Thomas Wouterscf297e42007-02-23 15:07:44 +00001460 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001461 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001462 with self.open(name, "r", pwd) as fp:
1463 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001464
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001465 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001466 """Return file-like object for 'name'.
1467
1468 name is a string for the file name within the ZIP file, or a ZipInfo
1469 object.
1470
1471 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1472 write to a file newly added to the archive.
1473
1474 pwd is the password to decrypt files (only used for reading).
1475
1476 When writing, if the file size is not known in advance but may exceed
1477 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1478 files. If the size is known in advance, it is best to pass a ZipInfo
1479 instance for name, with zinfo.file_size set.
1480 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001481 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001482 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001483 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001484 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001485 if pwd and (mode == "w"):
1486 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001487 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001488 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001489 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001490
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001491 # Make sure we have an info object
1492 if isinstance(name, ZipInfo):
1493 # 'name' is already an info object
1494 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001495 elif mode == 'w':
1496 zinfo = ZipInfo(name)
1497 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001498 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001499 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001500 # Get info object for name
1501 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001502
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001503 if mode == 'w':
1504 return self._open_to_write(zinfo, force_zip64=force_zip64)
1505
1506 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001507 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001508 "is an open writing handle on it. "
1509 "Close the writing handle before trying to read.")
1510
1511 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001512 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001513 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1514 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001515 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001516 # Skip the file header:
1517 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001518 if len(fheader) != sizeFileHeader:
1519 raise BadZipFile("Truncated file header")
1520 fheader = struct.unpack(structFileHeader, fheader)
1521 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001522 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001523
Antoine Pitrou17babc52012-11-17 23:50:08 +01001524 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1525 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1526 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001527
Antoine Pitrou8572da52012-11-17 23:52:05 +01001528 if zinfo.flag_bits & 0x20:
1529 # Zip 2.7: compressed patched data
1530 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001531
Antoine Pitrou8572da52012-11-17 23:52:05 +01001532 if zinfo.flag_bits & 0x40:
1533 # strong encryption
1534 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001535
Antoine Pitrou17babc52012-11-17 23:50:08 +01001536 if zinfo.flag_bits & 0x800:
1537 # UTF-8 filename
1538 fname_str = fname.decode("utf-8")
1539 else:
1540 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001541
Antoine Pitrou17babc52012-11-17 23:50:08 +01001542 if fname_str != zinfo.orig_filename:
1543 raise BadZipFile(
1544 'File name in directory %r and header %r differ.'
1545 % (zinfo.orig_filename, fname))
1546
1547 # check for encrypted flag & handle password
1548 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001549 if is_encrypted:
1550 if not pwd:
1551 pwd = self.pwd
1552 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001553 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001554 "required for extraction" % name)
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001555 else:
1556 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001557
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001558 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001559 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001560 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001561 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001562
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001563 def _open_to_write(self, zinfo, force_zip64=False):
1564 if force_zip64 and not self._allowZip64:
1565 raise ValueError(
1566 "force_zip64 is True, but allowZip64 was False when opening "
1567 "the ZIP file."
1568 )
1569 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001570 raise ValueError("Can't write to the ZIP file while there is "
1571 "another write handle open on it. "
1572 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001573
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001574 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001575 zinfo.compress_size = 0
1576 zinfo.CRC = 0
1577
1578 zinfo.flag_bits = 0x00
1579 if zinfo.compress_type == ZIP_LZMA:
1580 # Compressed data includes an end-of-stream (EOS) marker
1581 zinfo.flag_bits |= 0x02
1582 if not self._seekable:
1583 zinfo.flag_bits |= 0x08
1584
1585 if not zinfo.external_attr:
1586 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1587
1588 # Compressed size can be larger than uncompressed size
1589 zip64 = self._allowZip64 and \
1590 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1591
1592 if self._seekable:
1593 self.fp.seek(self.start_dir)
1594 zinfo.header_offset = self.fp.tell()
1595
1596 self._writecheck(zinfo)
1597 self._didModify = True
1598
1599 self.fp.write(zinfo.FileHeader(zip64))
1600
1601 self._writing = True
1602 return _ZipWriteFile(self, zinfo, zip64)
1603
Christian Heimes790c8232008-01-07 21:14:23 +00001604 def extract(self, member, path=None, pwd=None):
1605 """Extract a member from the archive to the current working directory,
1606 using its full name. Its file information is extracted as accurately
1607 as possible. `member' may be a filename or a ZipInfo object. You can
1608 specify a different directory using `path'.
1609 """
Christian Heimes790c8232008-01-07 21:14:23 +00001610 if path is None:
1611 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001612 else:
1613 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001614
1615 return self._extract_member(member, path, pwd)
1616
1617 def extractall(self, path=None, members=None, pwd=None):
1618 """Extract all members from the archive to the current working
1619 directory. `path' specifies a different directory to extract to.
1620 `members' is optional and must be a subset of the list returned
1621 by namelist().
1622 """
1623 if members is None:
1624 members = self.namelist()
1625
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001626 if path is None:
1627 path = os.getcwd()
1628 else:
1629 path = os.fspath(path)
1630
Christian Heimes790c8232008-01-07 21:14:23 +00001631 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001632 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001633
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001634 @classmethod
1635 def _sanitize_windows_name(cls, arcname, pathsep):
1636 """Replace bad characters and remove trailing dots from parts."""
1637 table = cls._windows_illegal_name_trans_table
1638 if not table:
1639 illegal = ':<>|"?*'
1640 table = str.maketrans(illegal, '_' * len(illegal))
1641 cls._windows_illegal_name_trans_table = table
1642 arcname = arcname.translate(table)
1643 # remove trailing dots
1644 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1645 # rejoin, removing empty parts.
1646 arcname = pathsep.join(x for x in arcname if x)
1647 return arcname
1648
Christian Heimes790c8232008-01-07 21:14:23 +00001649 def _extract_member(self, member, targetpath, pwd):
1650 """Extract the ZipInfo object 'member' to a physical
1651 file on the path targetpath.
1652 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001653 if not isinstance(member, ZipInfo):
1654 member = self.getinfo(member)
1655
Christian Heimes790c8232008-01-07 21:14:23 +00001656 # build the destination pathname, replacing
1657 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001658 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001659
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001660 if os.path.altsep:
1661 arcname = arcname.replace(os.path.altsep, os.path.sep)
1662 # interpret absolute pathname as relative, remove drive letter or
1663 # UNC path, redundant separators, "." and ".." components.
1664 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001665 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001666 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001667 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001668 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001669 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001670 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001671
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001672 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001673 targetpath = os.path.normpath(targetpath)
1674
1675 # Create all upper directories if necessary.
1676 upperdirs = os.path.dirname(targetpath)
1677 if upperdirs and not os.path.exists(upperdirs):
1678 os.makedirs(upperdirs)
1679
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001680 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001681 if not os.path.isdir(targetpath):
1682 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001683 return targetpath
1684
Antoine Pitrou17babc52012-11-17 23:50:08 +01001685 with self.open(member, pwd=pwd) as source, \
1686 open(targetpath, "wb") as target:
1687 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001688
1689 return targetpath
1690
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001691 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001692 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001693 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001694 import warnings
1695 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001696 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001697 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001698 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001699 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001700 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001701 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001702 if not self._allowZip64:
1703 requires_zip64 = None
1704 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1705 requires_zip64 = "Files count"
1706 elif zinfo.file_size > ZIP64_LIMIT:
1707 requires_zip64 = "Filesize"
1708 elif zinfo.header_offset > ZIP64_LIMIT:
1709 requires_zip64 = "Zipfile size"
1710 if requires_zip64:
1711 raise LargeZipFile(requires_zip64 +
1712 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001713
Bo Baylesce237c72018-01-29 23:54:07 -06001714 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001715 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001716 """Put the bytes from filename into the archive under the name
1717 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001718 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001719 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001720 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001721 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001722 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001723 "Can't write to ZIP archive while an open writing handle exists"
1724 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001725
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001726 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001727 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001728
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001729 if zinfo.is_dir():
1730 zinfo.compress_size = 0
1731 zinfo.CRC = 0
1732 else:
1733 if compress_type is not None:
1734 zinfo.compress_type = compress_type
1735 else:
1736 zinfo.compress_type = self.compression
1737
Bo Baylesce237c72018-01-29 23:54:07 -06001738 if compresslevel is not None:
1739 zinfo._compresslevel = compresslevel
1740 else:
1741 zinfo._compresslevel = self.compresslevel
1742
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001743 if zinfo.is_dir():
1744 with self._lock:
1745 if self._seekable:
1746 self.fp.seek(self.start_dir)
1747 zinfo.header_offset = self.fp.tell() # Start of header bytes
1748 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001749 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001750 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001751
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001752 self._writecheck(zinfo)
1753 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001754
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001755 self.filelist.append(zinfo)
1756 self.NameToInfo[zinfo.filename] = zinfo
1757 self.fp.write(zinfo.FileHeader(False))
1758 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001759 else:
1760 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1761 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001762
Bo Baylesce237c72018-01-29 23:54:07 -06001763 def writestr(self, zinfo_or_arcname, data,
1764 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001765 """Write a file into the archive. The contents is 'data', which
1766 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1767 it is encoded as UTF-8 first.
1768 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001769 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001770 if isinstance(data, str):
1771 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001772 if not isinstance(zinfo_or_arcname, ZipInfo):
1773 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001774 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001775 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001776 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001777 if zinfo.filename[-1] == '/':
1778 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1779 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1780 else:
1781 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001782 else:
1783 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001784
1785 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001786 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001787 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001788 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001789 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001790 "Can't write to ZIP archive while an open writing handle exists."
1791 )
1792
1793 if compress_type is not None:
1794 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001795
Bo Baylesce237c72018-01-29 23:54:07 -06001796 if compresslevel is not None:
1797 zinfo._compresslevel = compresslevel
1798
Guido van Rossum85825dc2007-08-27 17:03:28 +00001799 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001800 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001801 with self.open(zinfo, mode='w') as dest:
1802 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001803
1804 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001805 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001806 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001807
1808 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001809 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001810 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001811 if self.fp is None:
1812 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001813
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001814 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001815 raise ValueError("Can't close the ZIP file while there is "
1816 "an open writing handle on it. "
1817 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001818
Antoine Pitrou17babc52012-11-17 23:50:08 +01001819 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001820 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001821 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001822 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001823 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001824 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001825 finally:
1826 fp = self.fp
1827 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001828 self._fpclose(fp)
1829
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001830 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001831 for zinfo in self.filelist: # write central directory
1832 dt = zinfo.date_time
1833 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1834 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1835 extra = []
1836 if zinfo.file_size > ZIP64_LIMIT \
1837 or zinfo.compress_size > ZIP64_LIMIT:
1838 extra.append(zinfo.file_size)
1839 extra.append(zinfo.compress_size)
1840 file_size = 0xffffffff
1841 compress_size = 0xffffffff
1842 else:
1843 file_size = zinfo.file_size
1844 compress_size = zinfo.compress_size
1845
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001846 if zinfo.header_offset > ZIP64_LIMIT:
1847 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001848 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001849 else:
1850 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001851
1852 extra_data = zinfo.extra
1853 min_version = 0
1854 if extra:
1855 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001856 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001857 extra_data = struct.pack(
1858 '<HH' + 'Q'*len(extra),
1859 1, 8*len(extra), *extra) + extra_data
1860
1861 min_version = ZIP64_VERSION
1862
1863 if zinfo.compress_type == ZIP_BZIP2:
1864 min_version = max(BZIP2_VERSION, min_version)
1865 elif zinfo.compress_type == ZIP_LZMA:
1866 min_version = max(LZMA_VERSION, min_version)
1867
1868 extract_version = max(min_version, zinfo.extract_version)
1869 create_version = max(min_version, zinfo.create_version)
Victor Stinner1d3b0aa2020-01-17 15:17:48 +01001870 filename, flag_bits = zinfo._encodeFilenameFlags()
1871 centdir = struct.pack(structCentralDir,
1872 stringCentralDir, create_version,
1873 zinfo.create_system, extract_version, zinfo.reserved,
1874 flag_bits, zinfo.compress_type, dostime, dosdate,
1875 zinfo.CRC, compress_size, file_size,
1876 len(filename), len(extra_data), len(zinfo.comment),
1877 0, zinfo.internal_attr, zinfo.external_attr,
1878 header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001879 self.fp.write(centdir)
1880 self.fp.write(filename)
1881 self.fp.write(extra_data)
1882 self.fp.write(zinfo.comment)
1883
1884 pos2 = self.fp.tell()
1885 # Write end-of-zip-archive record
1886 centDirCount = len(self.filelist)
1887 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001888 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001889 requires_zip64 = None
1890 if centDirCount > ZIP_FILECOUNT_LIMIT:
1891 requires_zip64 = "Files count"
1892 elif centDirOffset > ZIP64_LIMIT:
1893 requires_zip64 = "Central directory offset"
1894 elif centDirSize > ZIP64_LIMIT:
1895 requires_zip64 = "Central directory size"
1896 if requires_zip64:
1897 # Need to write the ZIP64 end-of-archive records
1898 if not self._allowZip64:
1899 raise LargeZipFile(requires_zip64 +
1900 " would require ZIP64 extensions")
1901 zip64endrec = struct.pack(
1902 structEndArchive64, stringEndArchive64,
1903 44, 45, 45, 0, 0, centDirCount, centDirCount,
1904 centDirSize, centDirOffset)
1905 self.fp.write(zip64endrec)
1906
1907 zip64locrec = struct.pack(
1908 structEndArchive64Locator,
1909 stringEndArchive64Locator, 0, pos2, 1)
1910 self.fp.write(zip64locrec)
1911 centDirCount = min(centDirCount, 0xFFFF)
1912 centDirSize = min(centDirSize, 0xFFFFFFFF)
1913 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1914
1915 endrec = struct.pack(structEndArchive, stringEndArchive,
1916 0, 0, centDirCount, centDirCount,
1917 centDirSize, centDirOffset, len(self._comment))
1918 self.fp.write(endrec)
1919 self.fp.write(self._comment)
1920 self.fp.flush()
1921
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001922 def _fpclose(self, fp):
1923 assert self._fileRefCnt > 0
1924 self._fileRefCnt -= 1
1925 if not self._fileRefCnt and not self._filePassed:
1926 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001927
1928
1929class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001930 """Class to create ZIP archives with Python library files and packages."""
1931
Georg Brandl8334fd92010-12-04 10:26:46 +00001932 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001933 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 ZipFile.__init__(self, file, mode=mode, compression=compression,
1935 allowZip64=allowZip64)
1936 self._optimize = optimize
1937
Christian Tismer59202e52013-10-21 03:59:23 +02001938 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001939 """Add all files from "pathname" to the ZIP archive.
1940
Fred Drake484d7352000-10-02 21:14:52 +00001941 If pathname is a package directory, search the directory and
1942 all package subdirectories recursively for all *.py and enter
1943 the modules into the archive. If pathname is a plain
1944 directory, listdir *.py and enter all modules. Else, pathname
1945 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001946 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001947 This method will compile the module.py into module.pyc if
1948 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001949 If filterfunc(pathname) is given, it is called with every argument.
1950 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001951 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001952 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001953 if filterfunc and not filterfunc(pathname):
1954 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001955 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001956 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001957 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001958 dir, name = os.path.split(pathname)
1959 if os.path.isdir(pathname):
1960 initname = os.path.join(pathname, "__init__.py")
1961 if os.path.isfile(initname):
1962 # This is a package directory, add it
1963 if basename:
1964 basename = "%s/%s" % (basename, name)
1965 else:
1966 basename = name
1967 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001968 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001969 fname, arcname = self._get_codename(initname[0:-3], basename)
1970 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001971 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001972 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001973 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001974 dirlist.remove("__init__.py")
1975 # Add all *.py files and package subdirectories
1976 for filename in dirlist:
1977 path = os.path.join(pathname, filename)
1978 root, ext = os.path.splitext(filename)
1979 if os.path.isdir(path):
1980 if os.path.isfile(os.path.join(path, "__init__.py")):
1981 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001982 self.writepy(path, basename,
1983 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001984 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001985 if filterfunc and not filterfunc(path):
1986 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001987 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001988 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001989 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001990 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001991 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001992 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 self.write(fname, arcname)
1994 else:
1995 # This is NOT a package directory, add its files at top level
1996 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001997 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001998 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001999 path = os.path.join(pathname, filename)
2000 root, ext = os.path.splitext(filename)
2001 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002002 if filterfunc and not filterfunc(path):
2003 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002004 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002005 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002006 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002007 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002008 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002009 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002010 self.write(fname, arcname)
2011 else:
2012 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002013 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002014 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002015 fname, arcname = self._get_codename(pathname[0:-3], basename)
2016 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002017 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002018 self.write(fname, arcname)
2019
2020 def _get_codename(self, pathname, basename):
2021 """Return (filename, archivename) for the path.
2022
Fred Drake484d7352000-10-02 21:14:52 +00002023 Given a module name path, return the correct file path and
2024 archive name, compiling if necessary. For example, given
2025 /python/lib/string, return (/python/lib/string.pyc, string).
2026 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002027 def _compile(file, optimize=-1):
2028 import py_compile
2029 if self.debug:
2030 print("Compiling", file)
2031 try:
2032 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002033 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002034 print(err.msg)
2035 return False
2036 return True
2037
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002038 file_py = pathname + ".py"
2039 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002040 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2041 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2042 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002043 if self._optimize == -1:
2044 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002045 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002046 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2047 # Use .pyc file.
2048 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002049 elif (os.path.isfile(pycache_opt0) and
2050 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002051 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2052 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002053 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002054 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002055 elif (os.path.isfile(pycache_opt1) and
2056 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2057 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002058 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002059 fname = pycache_opt1
2060 arcname = file_pyc
2061 elif (os.path.isfile(pycache_opt2) and
2062 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2063 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2064 # file name in the archive.
2065 fname = pycache_opt2
2066 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002067 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002068 # Compile py into PEP 3147 pyc file.
2069 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002070 if sys.flags.optimize == 0:
2071 fname = pycache_opt0
2072 elif sys.flags.optimize == 1:
2073 fname = pycache_opt1
2074 else:
2075 fname = pycache_opt2
2076 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002077 else:
2078 fname = arcname = file_py
2079 else:
2080 # new mode: use given optimization level
2081 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002082 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002083 arcname = file_pyc
2084 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002085 arcname = file_pyc
2086 if self._optimize == 1:
2087 fname = pycache_opt1
2088 elif self._optimize == 2:
2089 fname = pycache_opt2
2090 else:
2091 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2092 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002093 if not (os.path.isfile(fname) and
2094 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2095 if not _compile(file_py, optimize=self._optimize):
2096 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002097 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002098 if basename:
2099 archivename = "%s/%s" % (basename, archivename)
2100 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002101
2102
shireenraoa4e29912019-08-24 11:26:41 -04002103def _unique_everseen(iterable, key=None):
2104 "List unique elements, preserving order. Remember all elements ever seen."
2105 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2106 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2107 seen = set()
2108 seen_add = seen.add
2109 if key is None:
2110 for element in itertools.filterfalse(seen.__contains__, iterable):
2111 seen_add(element)
2112 yield element
2113 else:
2114 for element in iterable:
2115 k = key(element)
2116 if k not in seen:
2117 seen_add(k)
2118 yield element
2119
2120
2121def _parents(path):
2122 """
2123 Given a path with elements separated by
2124 posixpath.sep, generate all parents of that path.
2125
2126 >>> list(_parents('b/d'))
2127 ['b']
2128 >>> list(_parents('/b/d/'))
2129 ['/b']
2130 >>> list(_parents('b/d/f/'))
2131 ['b/d', 'b']
2132 >>> list(_parents('b'))
2133 []
2134 >>> list(_parents(''))
2135 []
2136 """
2137 return itertools.islice(_ancestry(path), 1, None)
2138
2139
2140def _ancestry(path):
2141 """
2142 Given a path with elements separated by
2143 posixpath.sep, generate all elements of that path
2144
2145 >>> list(_ancestry('b/d'))
2146 ['b/d', 'b']
2147 >>> list(_ancestry('/b/d/'))
2148 ['/b/d', '/b']
2149 >>> list(_ancestry('b/d/f/'))
2150 ['b/d/f', 'b/d', 'b']
2151 >>> list(_ancestry('b'))
2152 ['b']
2153 >>> list(_ancestry(''))
2154 []
2155 """
2156 path = path.rstrip(posixpath.sep)
2157 while path and path != posixpath.sep:
2158 yield path
2159 path, tail = posixpath.split(path)
2160
2161
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002162class Path:
2163 """
2164 A pathlib-compatible interface for zip files.
2165
2166 Consider a zip file with this structure::
2167
2168 .
2169 ├── a.txt
2170 └── b
2171 ├── c.txt
2172 └── d
2173 └── e.txt
2174
2175 >>> data = io.BytesIO()
2176 >>> zf = ZipFile(data, 'w')
2177 >>> zf.writestr('a.txt', 'content of a')
2178 >>> zf.writestr('b/c.txt', 'content of c')
2179 >>> zf.writestr('b/d/e.txt', 'content of e')
2180 >>> zf.filename = 'abcde.zip'
2181
2182 Path accepts the zipfile object itself or a filename
2183
2184 >>> root = Path(zf)
2185
2186 From there, several path operations are available.
2187
2188 Directory iteration (including the zip file itself):
2189
2190 >>> a, b = root.iterdir()
2191 >>> a
2192 Path('abcde.zip', 'a.txt')
2193 >>> b
2194 Path('abcde.zip', 'b/')
2195
2196 name property:
2197
2198 >>> b.name
2199 'b'
2200
2201 join with divide operator:
2202
2203 >>> c = b / 'c.txt'
2204 >>> c
2205 Path('abcde.zip', 'b/c.txt')
2206 >>> c.name
2207 'c.txt'
2208
2209 Read text:
2210
2211 >>> c.read_text()
2212 'content of c'
2213
2214 existence:
2215
2216 >>> c.exists()
2217 True
2218 >>> (b / 'missing.txt').exists()
2219 False
2220
Xtreak0d702272019-06-03 04:42:33 +05302221 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002222
2223 >>> str(c)
2224 'abcde.zip/b/c.txt'
2225 """
2226
2227 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2228
2229 def __init__(self, root, at=""):
2230 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2231 self.at = at
2232
2233 @property
2234 def open(self):
2235 return functools.partial(self.root.open, self.at)
2236
2237 @property
2238 def name(self):
2239 return posixpath.basename(self.at.rstrip("/"))
2240
2241 def read_text(self, *args, **kwargs):
2242 with self.open() as strm:
2243 return io.TextIOWrapper(strm, *args, **kwargs).read()
2244
2245 def read_bytes(self):
2246 with self.open() as strm:
2247 return strm.read()
2248
2249 def _is_child(self, path):
2250 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2251
2252 def _next(self, at):
2253 return Path(self.root, at)
2254
2255 def is_dir(self):
2256 return not self.at or self.at.endswith("/")
2257
2258 def is_file(self):
2259 return not self.is_dir()
2260
2261 def exists(self):
2262 return self.at in self._names()
2263
2264 def iterdir(self):
2265 if not self.is_dir():
2266 raise ValueError("Can't listdir a file")
2267 subs = map(self._next, self._names())
2268 return filter(self._is_child, subs)
2269
2270 def __str__(self):
2271 return posixpath.join(self.root.filename, self.at)
2272
2273 def __repr__(self):
2274 return self.__repr.format(self=self)
2275
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002276 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002277 next = posixpath.join(self.at, add)
2278 next_dir = posixpath.join(self.at, add, "")
2279 names = self._names()
2280 return self._next(next_dir if next not in names and next_dir in names else next)
2281
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002282 __truediv__ = joinpath
2283
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002284 @staticmethod
shireenraoa4e29912019-08-24 11:26:41 -04002285 def _implied_dirs(names):
2286 return _unique_everseen(
2287 parent + "/"
2288 for name in names
2289 for parent in _parents(name)
2290 if parent + "/" not in names
2291 )
2292
2293 @classmethod
2294 def _add_implied_dirs(cls, names):
2295 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002296
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002297 @property
2298 def parent(self):
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002299 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002300 if parent_at:
2301 parent_at += '/'
2302 return self._next(parent_at)
2303
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002304 def _names(self):
2305 return self._add_implied_dirs(self.root.namelist())
2306
2307
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002308def main(args=None):
2309 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002310
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002311 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002312 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002313 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002314 group.add_argument('-l', '--list', metavar='<zipfile>',
2315 help='Show listing of a zipfile')
2316 group.add_argument('-e', '--extract', nargs=2,
2317 metavar=('<zipfile>', '<output_dir>'),
2318 help='Extract zipfile into target dir')
2319 group.add_argument('-c', '--create', nargs='+',
2320 metavar=('<name>', '<file>'),
2321 help='Create zipfile from sources')
2322 group.add_argument('-t', '--test', metavar='<zipfile>',
2323 help='Test if a zipfile is valid')
2324 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002325
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002326 if args.test is not None:
2327 src = args.test
2328 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002329 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002330 if badfile:
2331 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002332 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002333
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002334 elif args.list is not None:
2335 src = args.list
2336 with ZipFile(src, 'r') as zf:
2337 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002338
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002339 elif args.extract is not None:
2340 src, curdir = args.extract
2341 with ZipFile(src, 'r') as zf:
2342 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002343
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002344 elif args.create is not None:
2345 zip_name = args.create.pop(0)
2346 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002347
2348 def addToZip(zf, path, zippath):
2349 if os.path.isfile(path):
2350 zf.write(path, zippath, ZIP_DEFLATED)
2351 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002352 if zippath:
2353 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002354 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002355 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002356 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002357 # else: ignore
2358
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002359 with ZipFile(zip_name, 'w') as zf:
2360 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002361 zippath = os.path.basename(path)
2362 if not zippath:
2363 zippath = os.path.basename(os.path.dirname(path))
2364 if zippath in ('', os.curdir, os.pardir):
2365 zippath = ''
2366 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002367
2368if __name__ == "__main__":
2369 main()