blob: 97a5ebf753b58b299e6773450fa9bd81fb56d010 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
shireenraoa4e29912019-08-24 11:26:41 -040010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200379 self.compress_size = 0 # Size of the compressed file
380 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000382 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000383 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000468 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000469 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000471 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000473 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 elif ln == 0:
475 counts = ()
476 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300477 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478
479 idx = 0
480
481 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000482 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000483 self.file_size = counts[idx]
484 idx += 1
485
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000486 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000487 self.compress_size = counts[idx]
488 idx += 1
489
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000490 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000491 old = self.header_offset
492 self.header_offset = counts[idx]
493 idx+=1
494
495 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000496
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200497 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200498 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200499 """Construct an appropriate ZipInfo for a file on the filesystem.
500
501 filename should be the path to a file or directory on the filesystem.
502
503 arcname is the name which it will have within the archive (by default,
504 this will be the same as filename, but without a drive letter and with
505 leading path separators removed).
506 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200507 if isinstance(filename, os.PathLike):
508 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 st = os.stat(filename)
510 isdir = stat.S_ISDIR(st.st_mode)
511 mtime = time.localtime(st.st_mtime)
512 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200513 if not strict_timestamps and date_time[0] < 1980:
514 date_time = (1980, 1, 1, 0, 0, 0)
515 elif not strict_timestamps and date_time[0] > 2107:
516 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200517 # Create ZipInfo instance to store file information
518 if arcname is None:
519 arcname = filename
520 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
521 while arcname[0] in (os.sep, os.altsep):
522 arcname = arcname[1:]
523 if isdir:
524 arcname += '/'
525 zinfo = cls(arcname, date_time)
526 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
527 if isdir:
528 zinfo.file_size = 0
529 zinfo.external_attr |= 0x10 # MS-DOS directory flag
530 else:
531 zinfo.file_size = st.st_size
532
533 return zinfo
534
535 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300536 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200537 return self.filename[-1] == '/'
538
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000539
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300540# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
541# internal keys. We noticed that a direct implementation is faster than
542# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300544_crctable = None
545def _gen_crc(crc):
546 for j in range(8):
547 if crc & 1:
548 crc = (crc >> 1) ^ 0xEDB88320
549 else:
550 crc >>= 1
551 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000552
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300553# ZIP supports a password-based form of encryption. Even though known
554# plaintext attacks have been found against it, it is still useful
555# to be able to get data out of such a file.
556#
557# Usage:
558# zd = _ZipDecrypter(mypwd)
559# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000560
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300561def _ZipDecrypter(pwd):
562 key0 = 305419896
563 key1 = 591751049
564 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 global _crctable
567 if _crctable is None:
568 _crctable = list(map(_gen_crc, range(256)))
569 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000570
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300571 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000574
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300575 def update_keys(c):
576 nonlocal key0, key1, key2
577 key0 = crc32(c, key0)
578 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
579 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
580 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000581
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300582 for p in pwd:
583 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000584
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300585 def decrypter(data):
586 """Decrypt a bytes object."""
587 result = bytearray()
588 append = result.append
589 for c in data:
590 k = key2 | 2
591 c ^= ((k * (k^1)) >> 8) & 0xFF
592 update_keys(c)
593 append(c)
594 return bytes(result)
595
596 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000597
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200598
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599class LZMACompressor:
600
601 def __init__(self):
602 self._comp = None
603
604 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200605 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200606 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200607 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200608 ])
609 return struct.pack('<BBH', 9, 4, len(props)) + props
610
611 def compress(self, data):
612 if self._comp is None:
613 return self._init() + self._comp.compress(data)
614 return self._comp.compress(data)
615
616 def flush(self):
617 if self._comp is None:
618 return self._init() + self._comp.flush()
619 return self._comp.flush()
620
621
622class LZMADecompressor:
623
624 def __init__(self):
625 self._decomp = None
626 self._unconsumed = b''
627 self.eof = False
628
629 def decompress(self, data):
630 if self._decomp is None:
631 self._unconsumed += data
632 if len(self._unconsumed) <= 4:
633 return b''
634 psize, = struct.unpack('<H', self._unconsumed[2:4])
635 if len(self._unconsumed) <= 4 + psize:
636 return b''
637
638 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200639 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
640 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200641 ])
642 data = self._unconsumed[4 + psize:]
643 del self._unconsumed
644
645 result = self._decomp.decompress(data)
646 self.eof = self._decomp.eof
647 return result
648
649
650compressor_names = {
651 0: 'store',
652 1: 'shrink',
653 2: 'reduce',
654 3: 'reduce',
655 4: 'reduce',
656 5: 'reduce',
657 6: 'implode',
658 7: 'tokenize',
659 8: 'deflate',
660 9: 'deflate64',
661 10: 'implode',
662 12: 'bzip2',
663 14: 'lzma',
664 18: 'terse',
665 19: 'lz77',
666 97: 'wavpack',
667 98: 'ppmd',
668}
669
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200670def _check_compression(compression):
671 if compression == ZIP_STORED:
672 pass
673 elif compression == ZIP_DEFLATED:
674 if not zlib:
675 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200676 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200677 elif compression == ZIP_BZIP2:
678 if not bz2:
679 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200680 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200681 elif compression == ZIP_LZMA:
682 if not lzma:
683 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200684 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300686 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687
688
Bo Baylesce237c72018-01-29 23:54:07 -0600689def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600691 if compresslevel is not None:
692 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
693 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600695 if compresslevel is not None:
696 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200697 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600698 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200699 elif compress_type == ZIP_LZMA:
700 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200701 else:
702 return None
703
704
705def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300706 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200707 if compress_type == ZIP_STORED:
708 return None
709 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200710 return zlib.decompressobj(-15)
711 elif compress_type == ZIP_BZIP2:
712 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200713 elif compress_type == ZIP_LZMA:
714 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200715 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200716 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200717 if descr:
718 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
719 else:
720 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200721
722
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200723class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300724 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200725 self._file = file
726 self._pos = pos
727 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200728 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300729 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700730 self.seekable = file.seekable
731 self.tell = file.tell
732
733 def seek(self, offset, whence=0):
734 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200735 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700736 raise ValueError("Can't reposition in the ZIP file while "
737 "there is an open writing handle on it. "
738 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200739 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700740 self._pos = self._file.tell()
741 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200742
743 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200744 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300745 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300746 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300747 "is an open writing handle on it. "
748 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200749 self._file.seek(self._pos)
750 data = self._file.read(n)
751 self._pos = self._file.tell()
752 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200753
754 def close(self):
755 if self._file is not None:
756 fileobj = self._file
757 self._file = None
758 self._close(fileobj)
759
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200760# Provide the tell method for unseekable stream
761class _Tellable:
762 def __init__(self, fp):
763 self.fp = fp
764 self.offset = 0
765
766 def write(self, data):
767 n = self.fp.write(data)
768 self.offset += n
769 return n
770
771 def tell(self):
772 return self.offset
773
774 def flush(self):
775 self.fp.flush()
776
777 def close(self):
778 self.fp.close()
779
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200780
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000781class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 """File-like object for reading an archive member.
783 Is returned by ZipFile.open().
784 """
785
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000786 # Max size supported by decompressor.
787 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000789 # Read from compressed files in 4k blocks.
790 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000791
John Jolly066df4f2018-01-30 01:51:35 -0700792 # Chunk size to read during seek
793 MAX_SEEK_READ = 1 << 24
794
Łukasz Langae94980a2010-11-22 23:31:26 +0000795 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
796 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797 self._fileobj = fileobj
798 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000799 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800
Ezio Melotti92b47432010-01-28 01:44:41 +0000801 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000802 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200803 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000804
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200805 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000806
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200807 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000808 self._readbuffer = b''
809 self._offset = 0
810
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000811 self.newlines = None
812
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000813 # Adjust read size for encrypted files since the first 12 bytes
814 # are for the encryption/password information.
815 if self._decrypter is not None:
816 self._compress_left -= 12
817
818 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000819 self.name = zipinfo.filename
820
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000821 if hasattr(zipinfo, 'CRC'):
822 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000823 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000824 else:
825 self._expected_crc = None
826
John Jolly066df4f2018-01-30 01:51:35 -0700827 self._seekable = False
828 try:
829 if fileobj.seekable():
830 self._orig_compress_start = fileobj.tell()
831 self._orig_compress_size = zipinfo.compress_size
832 self._orig_file_size = zipinfo.file_size
833 self._orig_start_crc = self._running_crc
834 self._seekable = True
835 except AttributeError:
836 pass
837
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200838 def __repr__(self):
839 result = ['<%s.%s' % (self.__class__.__module__,
840 self.__class__.__qualname__)]
841 if not self.closed:
842 result.append(' name=%r mode=%r' % (self.name, self.mode))
843 if self._compress_type != ZIP_STORED:
844 result.append(' compress_type=%s' %
845 compressor_names.get(self._compress_type,
846 self._compress_type))
847 else:
848 result.append(' [closed]')
849 result.append('>')
850 return ''.join(result)
851
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000852 def readline(self, limit=-1):
853 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000855 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857
Serhiy Storchakae670be22016-06-11 19:32:44 +0300858 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000859 # Shortcut common case - newline found in buffer.
860 i = self._readbuffer.find(b'\n', self._offset) + 1
861 if i > 0:
862 line = self._readbuffer[self._offset: i]
863 self._offset = i
864 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865
Serhiy Storchakae670be22016-06-11 19:32:44 +0300866 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000867
868 def peek(self, n=1):
869 """Returns buffered bytes without advancing the position."""
870 if n > len(self._readbuffer) - self._offset:
871 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200872 if len(chunk) > self._offset:
873 self._readbuffer = chunk + self._readbuffer[self._offset:]
874 self._offset = 0
875 else:
876 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000877
878 # Return up to 512 bytes to reduce allocation overhead for tight loops.
879 return self._readbuffer[self._offset: self._offset + 512]
880
881 def readable(self):
882 return True
883
884 def read(self, n=-1):
885 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800886 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200888 if n is None or n < 0:
889 buf = self._readbuffer[self._offset:]
890 self._readbuffer = b''
891 self._offset = 0
892 while not self._eof:
893 buf += self._read1(self.MAX_N)
894 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895
Antoine Pitrou78157b32012-06-23 16:44:48 +0200896 end = n + self._offset
897 if end < len(self._readbuffer):
898 buf = self._readbuffer[self._offset:end]
899 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200900 return buf
901
Antoine Pitrou78157b32012-06-23 16:44:48 +0200902 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 buf = self._readbuffer[self._offset:]
904 self._readbuffer = b''
905 self._offset = 0
906 while n > 0 and not self._eof:
907 data = self._read1(n)
908 if n < len(data):
909 self._readbuffer = data
910 self._offset = n
911 buf += data[:n]
912 break
913 buf += data
914 n -= len(data)
915 return buf
916
917 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000918 # Update the CRC using the given data.
919 if self._expected_crc is None:
920 # No need to compute the CRC if we don't have a reference value
921 return
Martin Panterb82032f2015-12-11 05:19:29 +0000922 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000923 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200924 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000925 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000926
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000927 def read1(self, n):
928 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200930 if n is None or n < 0:
931 buf = self._readbuffer[self._offset:]
932 self._readbuffer = b''
933 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300934 while not self._eof:
935 data = self._read1(self.MAX_N)
936 if data:
937 buf += data
938 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200939 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000940
Antoine Pitrou78157b32012-06-23 16:44:48 +0200941 end = n + self._offset
942 if end < len(self._readbuffer):
943 buf = self._readbuffer[self._offset:end]
944 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200945 return buf
946
Antoine Pitrou78157b32012-06-23 16:44:48 +0200947 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200948 buf = self._readbuffer[self._offset:]
949 self._readbuffer = b''
950 self._offset = 0
951 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300952 while not self._eof:
953 data = self._read1(n)
954 if n < len(data):
955 self._readbuffer = data
956 self._offset = n
957 buf += data[:n]
958 break
959 if data:
960 buf += data
961 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 return buf
963
964 def _read1(self, n):
965 # Read up to n compressed bytes with at most one read() system call,
966 # decrypt and decompress them.
967 if self._eof or n <= 0:
968 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000969
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000970 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200971 if self._compress_type == ZIP_DEFLATED:
972 ## Handle unconsumed data.
973 data = self._decompressor.unconsumed_tail
974 if n > len(data):
975 data += self._read2(n - len(data))
976 else:
977 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000978
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200979 if self._compress_type == ZIP_STORED:
980 self._eof = self._compress_left <= 0
981 elif self._compress_type == ZIP_DEFLATED:
982 n = max(n, self.MIN_READ_SIZE)
983 data = self._decompressor.decompress(data, n)
984 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200985 self._compress_left <= 0 and
986 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200987 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000988 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200989 else:
990 data = self._decompressor.decompress(data)
991 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200993 data = data[:self._left]
994 self._left -= len(data)
995 if self._left <= 0:
996 self._eof = True
997 self._update_crc(data)
998 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000999
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001000 def _read2(self, n):
1001 if self._compress_left <= 0:
1002 return b''
1003
1004 n = max(n, self.MIN_READ_SIZE)
1005 n = min(n, self._compress_left)
1006
1007 data = self._fileobj.read(n)
1008 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001009 if not data:
1010 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001011
1012 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001013 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001014 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001015
Łukasz Langae94980a2010-11-22 23:31:26 +00001016 def close(self):
1017 try:
1018 if self._close_fileobj:
1019 self._fileobj.close()
1020 finally:
1021 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001022
John Jolly066df4f2018-01-30 01:51:35 -07001023 def seekable(self):
1024 return self._seekable
1025
1026 def seek(self, offset, whence=0):
1027 if not self._seekable:
1028 raise io.UnsupportedOperation("underlying stream is not seekable")
1029 curr_pos = self.tell()
1030 if whence == 0: # Seek from start of file
1031 new_pos = offset
1032 elif whence == 1: # Seek from current position
1033 new_pos = curr_pos + offset
1034 elif whence == 2: # Seek from EOF
1035 new_pos = self._orig_file_size + offset
1036 else:
1037 raise ValueError("whence must be os.SEEK_SET (0), "
1038 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1039
1040 if new_pos > self._orig_file_size:
1041 new_pos = self._orig_file_size
1042
1043 if new_pos < 0:
1044 new_pos = 0
1045
1046 read_offset = new_pos - curr_pos
1047 buff_offset = read_offset + self._offset
1048
1049 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1050 # Just move the _offset index if the new position is in the _readbuffer
1051 self._offset = buff_offset
1052 read_offset = 0
1053 elif read_offset < 0:
1054 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001055 self._fileobj.seek(self._orig_compress_start)
1056 self._running_crc = self._orig_start_crc
1057 self._compress_left = self._orig_compress_size
1058 self._left = self._orig_file_size
1059 self._readbuffer = b''
1060 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001061 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001062 self._eof = False
1063 read_offset = new_pos
1064
1065 while read_offset > 0:
1066 read_len = min(self.MAX_SEEK_READ, read_offset)
1067 self.read(read_len)
1068 read_offset -= read_len
1069
1070 return self.tell()
1071
1072 def tell(self):
1073 if not self._seekable:
1074 raise io.UnsupportedOperation("underlying stream is not seekable")
1075 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1076 return filepos
1077
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001078
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001079class _ZipWriteFile(io.BufferedIOBase):
1080 def __init__(self, zf, zinfo, zip64):
1081 self._zinfo = zinfo
1082 self._zip64 = zip64
1083 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001084 self._compressor = _get_compressor(zinfo.compress_type,
1085 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001086 self._file_size = 0
1087 self._compress_size = 0
1088 self._crc = 0
1089
1090 @property
1091 def _fileobj(self):
1092 return self._zipfile.fp
1093
1094 def writable(self):
1095 return True
1096
1097 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001098 if self.closed:
1099 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001100 nbytes = len(data)
1101 self._file_size += nbytes
1102 self._crc = crc32(data, self._crc)
1103 if self._compressor:
1104 data = self._compressor.compress(data)
1105 self._compress_size += len(data)
1106 self._fileobj.write(data)
1107 return nbytes
1108
1109 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001110 if self.closed:
1111 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001112 try:
1113 super().close()
1114 # Flush any data from the compressor, and update header info
1115 if self._compressor:
1116 buf = self._compressor.flush()
1117 self._compress_size += len(buf)
1118 self._fileobj.write(buf)
1119 self._zinfo.compress_size = self._compress_size
1120 else:
1121 self._zinfo.compress_size = self._file_size
1122 self._zinfo.CRC = self._crc
1123 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001124
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001125 # Write updated header info
1126 if self._zinfo.flag_bits & 0x08:
1127 # Write CRC and file sizes after the file data
1128 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1129 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1130 self._zinfo.compress_size, self._zinfo.file_size))
1131 self._zipfile.start_dir = self._fileobj.tell()
1132 else:
1133 if not self._zip64:
1134 if self._file_size > ZIP64_LIMIT:
1135 raise RuntimeError(
1136 'File size unexpectedly exceeded ZIP64 limit')
1137 if self._compress_size > ZIP64_LIMIT:
1138 raise RuntimeError(
1139 'Compressed size unexpectedly exceeded ZIP64 limit')
1140 # Seek backwards and write file header (which will now include
1141 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001142
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001143 # Preserve current position in file
1144 self._zipfile.start_dir = self._fileobj.tell()
1145 self._fileobj.seek(self._zinfo.header_offset)
1146 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1147 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001148
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001149 # Successfully written: Add file to our caches
1150 self._zipfile.filelist.append(self._zinfo)
1151 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1152 finally:
1153 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001154
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001155
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001156
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001158 """ Class with methods to open, read, write, close, list zip files.
1159
Bo Baylesce237c72018-01-29 23:54:07 -06001160 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1161 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001162
Fred Drake3d9091e2001-03-26 15:49:24 +00001163 file: Either the path to the file, or a file-like object.
1164 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001165 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1166 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001167 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1168 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001169 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1170 needed, otherwise it will raise an exception when this would
1171 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001172 compresslevel: None (default for the given compression type) or an integer
1173 specifying the level to pass to the compressor.
1174 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1175 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1176 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001177
Fred Drake3d9091e2001-03-26 15:49:24 +00001178 """
Fred Drake484d7352000-10-02 21:14:52 +00001179
Fred Drake90eac282001-02-28 05:29:34 +00001180 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001181 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001182
Bo Baylesce237c72018-01-29 23:54:07 -06001183 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001184 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001185 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1186 or append 'a'."""
1187 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001188 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001189
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001190 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001191
1192 self._allowZip64 = allowZip64
1193 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001194 self.debug = 0 # Level of printing: 0 through 3
1195 self.NameToInfo = {} # Find file info given name
1196 self.filelist = [] # List of ZipInfo instances for archive
1197 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001198 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001199 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001200 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001201 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001202 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001203
Fred Drake3d9091e2001-03-26 15:49:24 +00001204 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001205 if isinstance(file, os.PathLike):
1206 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001207 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001208 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001209 self._filePassed = 0
1210 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001211 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1212 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001213 filemode = modeDict[mode]
1214 while True:
1215 try:
1216 self.fp = io.open(file, filemode)
1217 except OSError:
1218 if filemode in modeDict:
1219 filemode = modeDict[filemode]
1220 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001221 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001222 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001223 else:
1224 self._filePassed = 1
1225 self.fp = file
1226 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001227 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001228 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001229 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001230 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001231
Antoine Pitrou17babc52012-11-17 23:50:08 +01001232 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001233 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001234 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001235 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001236 # set the modified flag so central directory gets written
1237 # even if no files are added to the archive
1238 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001239 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001240 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001241 except (AttributeError, OSError):
1242 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001243 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001244 self._seekable = False
1245 else:
1246 # Some file-like objects can provide tell() but not seek()
1247 try:
1248 self.fp.seek(self.start_dir)
1249 except (AttributeError, OSError):
1250 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001251 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001252 try:
1253 # See if file is a zip file
1254 self._RealGetContents()
1255 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001256 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001257 except BadZipFile:
1258 # file is not a zip file, just append
1259 self.fp.seek(0, 2)
1260
1261 # set the modified flag so central directory gets written
1262 # even if no files are added to the archive
1263 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001264 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001265 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001266 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001267 except:
1268 fp = self.fp
1269 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001270 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001271 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001272
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001273 def __enter__(self):
1274 return self
1275
1276 def __exit__(self, type, value, traceback):
1277 self.close()
1278
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001279 def __repr__(self):
1280 result = ['<%s.%s' % (self.__class__.__module__,
1281 self.__class__.__qualname__)]
1282 if self.fp is not None:
1283 if self._filePassed:
1284 result.append(' file=%r' % self.fp)
1285 elif self.filename is not None:
1286 result.append(' filename=%r' % self.filename)
1287 result.append(' mode=%r' % self.mode)
1288 else:
1289 result.append(' [closed]')
1290 result.append('>')
1291 return ''.join(result)
1292
Tim Peters7d3bad62001-04-04 18:56:49 +00001293 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001294 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001295 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001296 try:
1297 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001298 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001299 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001300 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001301 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001302 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001303 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001304 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1305 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001306 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001307
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001308 # "concat" is zero, unless zip was concatenated to another file
1309 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001310 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1311 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001312 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001313
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001315 inferred = concat + offset_cd
1316 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001318 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001320 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001321 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001322 total = 0
1323 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001324 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001325 if len(centdir) != sizeCentralDir:
1326 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001327 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001328 if centdir[_CD_SIGNATURE] != stringCentralDir:
1329 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001330 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001331 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001332 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001333 flags = centdir[5]
1334 if flags & 0x800:
1335 # UTF-8 file names extension
1336 filename = filename.decode('utf-8')
1337 else:
1338 # Historical ZIP filename encoding
1339 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001341 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001342 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1343 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001344 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001346 x.flag_bits, x.compress_type, t, d,
1347 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001348 if x.extract_version > MAX_EXTRACT_VERSION:
1349 raise NotImplementedError("zip file version %.1f" %
1350 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001351 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1352 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001353 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001354 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001355 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001356
1357 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001358 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001359 self.filelist.append(x)
1360 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001361
1362 # update total bytes read from central directory
1363 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1364 + centdir[_CD_EXTRA_FIELD_LENGTH]
1365 + centdir[_CD_COMMENT_LENGTH])
1366
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001367 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001368 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001369
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001370
1371 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001372 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001373 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001374
1375 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001376 """Return a list of class ZipInfo instances for files in the
1377 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001378 return self.filelist
1379
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001380 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001381 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001382 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1383 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001384 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001385 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001386 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1387 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388
1389 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001390 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001391 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392 for zinfo in self.filelist:
1393 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001394 # Read by chunks, to avoid an OverflowError or a
1395 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001396 with self.open(zinfo.filename, "r") as f:
1397 while f.read(chunk_size): # Check CRC-32
1398 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001399 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 return zinfo.filename
1401
1402 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001403 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001404 info = self.NameToInfo.get(name)
1405 if info is None:
1406 raise KeyError(
1407 'There is no item named %r in the archive' % name)
1408
1409 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001410
Thomas Wouterscf297e42007-02-23 15:07:44 +00001411 def setpassword(self, pwd):
1412 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001413 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001414 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001415 if pwd:
1416 self.pwd = pwd
1417 else:
1418 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001419
R David Murrayf50b38a2012-04-12 18:44:58 -04001420 @property
1421 def comment(self):
1422 """The comment text associated with the ZIP file."""
1423 return self._comment
1424
1425 @comment.setter
1426 def comment(self, comment):
1427 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001428 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001429 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001430 if len(comment) > ZIP_MAX_COMMENT:
1431 import warnings
1432 warnings.warn('Archive comment is too long; truncating to %d bytes'
1433 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001434 comment = comment[:ZIP_MAX_COMMENT]
1435 self._comment = comment
1436 self._didModify = True
1437
Thomas Wouterscf297e42007-02-23 15:07:44 +00001438 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001439 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001440 with self.open(name, "r", pwd) as fp:
1441 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001442
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001443 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001444 """Return file-like object for 'name'.
1445
1446 name is a string for the file name within the ZIP file, or a ZipInfo
1447 object.
1448
1449 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1450 write to a file newly added to the archive.
1451
1452 pwd is the password to decrypt files (only used for reading).
1453
1454 When writing, if the file size is not known in advance but may exceed
1455 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1456 files. If the size is known in advance, it is best to pass a ZipInfo
1457 instance for name, with zinfo.file_size set.
1458 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001459 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001460 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001461 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001462 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001463 if pwd and (mode == "w"):
1464 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001465 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001466 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001467 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001468
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001469 # Make sure we have an info object
1470 if isinstance(name, ZipInfo):
1471 # 'name' is already an info object
1472 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001473 elif mode == 'w':
1474 zinfo = ZipInfo(name)
1475 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001476 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001477 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001478 # Get info object for name
1479 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001480
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001481 if mode == 'w':
1482 return self._open_to_write(zinfo, force_zip64=force_zip64)
1483
1484 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001485 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001486 "is an open writing handle on it. "
1487 "Close the writing handle before trying to read.")
1488
1489 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001490 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001491 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1492 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001493 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001494 # Skip the file header:
1495 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001496 if len(fheader) != sizeFileHeader:
1497 raise BadZipFile("Truncated file header")
1498 fheader = struct.unpack(structFileHeader, fheader)
1499 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001500 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001501
Antoine Pitrou17babc52012-11-17 23:50:08 +01001502 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1503 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1504 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001505
Antoine Pitrou8572da52012-11-17 23:52:05 +01001506 if zinfo.flag_bits & 0x20:
1507 # Zip 2.7: compressed patched data
1508 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001509
Antoine Pitrou8572da52012-11-17 23:52:05 +01001510 if zinfo.flag_bits & 0x40:
1511 # strong encryption
1512 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001513
Antoine Pitrou17babc52012-11-17 23:50:08 +01001514 if zinfo.flag_bits & 0x800:
1515 # UTF-8 filename
1516 fname_str = fname.decode("utf-8")
1517 else:
1518 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001519
Antoine Pitrou17babc52012-11-17 23:50:08 +01001520 if fname_str != zinfo.orig_filename:
1521 raise BadZipFile(
1522 'File name in directory %r and header %r differ.'
1523 % (zinfo.orig_filename, fname))
1524
1525 # check for encrypted flag & handle password
1526 is_encrypted = zinfo.flag_bits & 0x1
1527 zd = None
1528 if is_encrypted:
1529 if not pwd:
1530 pwd = self.pwd
1531 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001532 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001533 "required for extraction" % name)
1534
1535 zd = _ZipDecrypter(pwd)
1536 # The first 12 bytes in the cypher stream is an encryption header
1537 # used to strengthen the algorithm. The first 11 bytes are
1538 # completely random, while the 12th contains the MSB of the CRC,
1539 # or the MSB of the file time depending on the header type
1540 # and is used to check the correctness of the password.
1541 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001542 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001543 if zinfo.flag_bits & 0x8:
1544 # compare against the file type from extended local headers
1545 check_byte = (zinfo._raw_time >> 8) & 0xff
1546 else:
1547 # compare against the CRC otherwise
1548 check_byte = (zinfo.CRC >> 24) & 0xff
1549 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001550 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001551
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001552 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001553 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001554 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001555 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001556
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001557 def _open_to_write(self, zinfo, force_zip64=False):
1558 if force_zip64 and not self._allowZip64:
1559 raise ValueError(
1560 "force_zip64 is True, but allowZip64 was False when opening "
1561 "the ZIP file."
1562 )
1563 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001564 raise ValueError("Can't write to the ZIP file while there is "
1565 "another write handle open on it. "
1566 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001567
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001568 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001569 zinfo.compress_size = 0
1570 zinfo.CRC = 0
1571
1572 zinfo.flag_bits = 0x00
1573 if zinfo.compress_type == ZIP_LZMA:
1574 # Compressed data includes an end-of-stream (EOS) marker
1575 zinfo.flag_bits |= 0x02
1576 if not self._seekable:
1577 zinfo.flag_bits |= 0x08
1578
1579 if not zinfo.external_attr:
1580 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1581
1582 # Compressed size can be larger than uncompressed size
1583 zip64 = self._allowZip64 and \
1584 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1585
1586 if self._seekable:
1587 self.fp.seek(self.start_dir)
1588 zinfo.header_offset = self.fp.tell()
1589
1590 self._writecheck(zinfo)
1591 self._didModify = True
1592
1593 self.fp.write(zinfo.FileHeader(zip64))
1594
1595 self._writing = True
1596 return _ZipWriteFile(self, zinfo, zip64)
1597
Christian Heimes790c8232008-01-07 21:14:23 +00001598 def extract(self, member, path=None, pwd=None):
1599 """Extract a member from the archive to the current working directory,
1600 using its full name. Its file information is extracted as accurately
1601 as possible. `member' may be a filename or a ZipInfo object. You can
1602 specify a different directory using `path'.
1603 """
Christian Heimes790c8232008-01-07 21:14:23 +00001604 if path is None:
1605 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001606 else:
1607 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001608
1609 return self._extract_member(member, path, pwd)
1610
1611 def extractall(self, path=None, members=None, pwd=None):
1612 """Extract all members from the archive to the current working
1613 directory. `path' specifies a different directory to extract to.
1614 `members' is optional and must be a subset of the list returned
1615 by namelist().
1616 """
1617 if members is None:
1618 members = self.namelist()
1619
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001620 if path is None:
1621 path = os.getcwd()
1622 else:
1623 path = os.fspath(path)
1624
Christian Heimes790c8232008-01-07 21:14:23 +00001625 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001626 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001627
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001628 @classmethod
1629 def _sanitize_windows_name(cls, arcname, pathsep):
1630 """Replace bad characters and remove trailing dots from parts."""
1631 table = cls._windows_illegal_name_trans_table
1632 if not table:
1633 illegal = ':<>|"?*'
1634 table = str.maketrans(illegal, '_' * len(illegal))
1635 cls._windows_illegal_name_trans_table = table
1636 arcname = arcname.translate(table)
1637 # remove trailing dots
1638 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1639 # rejoin, removing empty parts.
1640 arcname = pathsep.join(x for x in arcname if x)
1641 return arcname
1642
Christian Heimes790c8232008-01-07 21:14:23 +00001643 def _extract_member(self, member, targetpath, pwd):
1644 """Extract the ZipInfo object 'member' to a physical
1645 file on the path targetpath.
1646 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001647 if not isinstance(member, ZipInfo):
1648 member = self.getinfo(member)
1649
Christian Heimes790c8232008-01-07 21:14:23 +00001650 # build the destination pathname, replacing
1651 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001652 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001653
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001654 if os.path.altsep:
1655 arcname = arcname.replace(os.path.altsep, os.path.sep)
1656 # interpret absolute pathname as relative, remove drive letter or
1657 # UNC path, redundant separators, "." and ".." components.
1658 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001659 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001660 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001661 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001662 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001663 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001664 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001665
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001666 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001667 targetpath = os.path.normpath(targetpath)
1668
1669 # Create all upper directories if necessary.
1670 upperdirs = os.path.dirname(targetpath)
1671 if upperdirs and not os.path.exists(upperdirs):
1672 os.makedirs(upperdirs)
1673
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001674 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001675 if not os.path.isdir(targetpath):
1676 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001677 return targetpath
1678
Antoine Pitrou17babc52012-11-17 23:50:08 +01001679 with self.open(member, pwd=pwd) as source, \
1680 open(targetpath, "wb") as target:
1681 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001682
1683 return targetpath
1684
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001685 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001686 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001687 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001688 import warnings
1689 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001690 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001691 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001692 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001693 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001694 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001695 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001696 if not self._allowZip64:
1697 requires_zip64 = None
1698 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1699 requires_zip64 = "Files count"
1700 elif zinfo.file_size > ZIP64_LIMIT:
1701 requires_zip64 = "Filesize"
1702 elif zinfo.header_offset > ZIP64_LIMIT:
1703 requires_zip64 = "Zipfile size"
1704 if requires_zip64:
1705 raise LargeZipFile(requires_zip64 +
1706 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001707
Bo Baylesce237c72018-01-29 23:54:07 -06001708 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001709 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001710 """Put the bytes from filename into the archive under the name
1711 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001712 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001713 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001714 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001715 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001716 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001717 "Can't write to ZIP archive while an open writing handle exists"
1718 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001719
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001720 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001721 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001722
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001723 if zinfo.is_dir():
1724 zinfo.compress_size = 0
1725 zinfo.CRC = 0
1726 else:
1727 if compress_type is not None:
1728 zinfo.compress_type = compress_type
1729 else:
1730 zinfo.compress_type = self.compression
1731
Bo Baylesce237c72018-01-29 23:54:07 -06001732 if compresslevel is not None:
1733 zinfo._compresslevel = compresslevel
1734 else:
1735 zinfo._compresslevel = self.compresslevel
1736
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001737 if zinfo.is_dir():
1738 with self._lock:
1739 if self._seekable:
1740 self.fp.seek(self.start_dir)
1741 zinfo.header_offset = self.fp.tell() # Start of header bytes
1742 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001743 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001744 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001745
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001746 self._writecheck(zinfo)
1747 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001748
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001749 self.filelist.append(zinfo)
1750 self.NameToInfo[zinfo.filename] = zinfo
1751 self.fp.write(zinfo.FileHeader(False))
1752 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001753 else:
1754 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1755 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001756
Bo Baylesce237c72018-01-29 23:54:07 -06001757 def writestr(self, zinfo_or_arcname, data,
1758 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001759 """Write a file into the archive. The contents is 'data', which
1760 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1761 it is encoded as UTF-8 first.
1762 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001763 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001764 if isinstance(data, str):
1765 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001766 if not isinstance(zinfo_or_arcname, ZipInfo):
1767 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001768 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001769 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001770 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001771 if zinfo.filename[-1] == '/':
1772 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1773 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1774 else:
1775 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001776 else:
1777 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001778
1779 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001780 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001781 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001782 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001783 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001784 "Can't write to ZIP archive while an open writing handle exists."
1785 )
1786
1787 if compress_type is not None:
1788 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001789
Bo Baylesce237c72018-01-29 23:54:07 -06001790 if compresslevel is not None:
1791 zinfo._compresslevel = compresslevel
1792
Guido van Rossum85825dc2007-08-27 17:03:28 +00001793 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001794 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001795 with self.open(zinfo, mode='w') as dest:
1796 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001797
1798 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001799 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001800 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001801
1802 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001803 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001804 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001805 if self.fp is None:
1806 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001807
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001808 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001809 raise ValueError("Can't close the ZIP file while there is "
1810 "an open writing handle on it. "
1811 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001812
Antoine Pitrou17babc52012-11-17 23:50:08 +01001813 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001814 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001815 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001816 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001817 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001818 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001819 finally:
1820 fp = self.fp
1821 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001822 self._fpclose(fp)
1823
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001824 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001825 for zinfo in self.filelist: # write central directory
1826 dt = zinfo.date_time
1827 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1828 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1829 extra = []
1830 if zinfo.file_size > ZIP64_LIMIT \
1831 or zinfo.compress_size > ZIP64_LIMIT:
1832 extra.append(zinfo.file_size)
1833 extra.append(zinfo.compress_size)
1834 file_size = 0xffffffff
1835 compress_size = 0xffffffff
1836 else:
1837 file_size = zinfo.file_size
1838 compress_size = zinfo.compress_size
1839
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001840 if zinfo.header_offset > ZIP64_LIMIT:
1841 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001842 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001843 else:
1844 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001845
1846 extra_data = zinfo.extra
1847 min_version = 0
1848 if extra:
1849 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001850 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001851 extra_data = struct.pack(
1852 '<HH' + 'Q'*len(extra),
1853 1, 8*len(extra), *extra) + extra_data
1854
1855 min_version = ZIP64_VERSION
1856
1857 if zinfo.compress_type == ZIP_BZIP2:
1858 min_version = max(BZIP2_VERSION, min_version)
1859 elif zinfo.compress_type == ZIP_LZMA:
1860 min_version = max(LZMA_VERSION, min_version)
1861
1862 extract_version = max(min_version, zinfo.extract_version)
1863 create_version = max(min_version, zinfo.create_version)
1864 try:
1865 filename, flag_bits = zinfo._encodeFilenameFlags()
1866 centdir = struct.pack(structCentralDir,
1867 stringCentralDir, create_version,
1868 zinfo.create_system, extract_version, zinfo.reserved,
1869 flag_bits, zinfo.compress_type, dostime, dosdate,
1870 zinfo.CRC, compress_size, file_size,
1871 len(filename), len(extra_data), len(zinfo.comment),
1872 0, zinfo.internal_attr, zinfo.external_attr,
1873 header_offset)
1874 except DeprecationWarning:
1875 print((structCentralDir, stringCentralDir, create_version,
1876 zinfo.create_system, extract_version, zinfo.reserved,
1877 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1878 zinfo.CRC, compress_size, file_size,
1879 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1880 0, zinfo.internal_attr, zinfo.external_attr,
1881 header_offset), file=sys.stderr)
1882 raise
1883 self.fp.write(centdir)
1884 self.fp.write(filename)
1885 self.fp.write(extra_data)
1886 self.fp.write(zinfo.comment)
1887
1888 pos2 = self.fp.tell()
1889 # Write end-of-zip-archive record
1890 centDirCount = len(self.filelist)
1891 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001892 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001893 requires_zip64 = None
1894 if centDirCount > ZIP_FILECOUNT_LIMIT:
1895 requires_zip64 = "Files count"
1896 elif centDirOffset > ZIP64_LIMIT:
1897 requires_zip64 = "Central directory offset"
1898 elif centDirSize > ZIP64_LIMIT:
1899 requires_zip64 = "Central directory size"
1900 if requires_zip64:
1901 # Need to write the ZIP64 end-of-archive records
1902 if not self._allowZip64:
1903 raise LargeZipFile(requires_zip64 +
1904 " would require ZIP64 extensions")
1905 zip64endrec = struct.pack(
1906 structEndArchive64, stringEndArchive64,
1907 44, 45, 45, 0, 0, centDirCount, centDirCount,
1908 centDirSize, centDirOffset)
1909 self.fp.write(zip64endrec)
1910
1911 zip64locrec = struct.pack(
1912 structEndArchive64Locator,
1913 stringEndArchive64Locator, 0, pos2, 1)
1914 self.fp.write(zip64locrec)
1915 centDirCount = min(centDirCount, 0xFFFF)
1916 centDirSize = min(centDirSize, 0xFFFFFFFF)
1917 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1918
1919 endrec = struct.pack(structEndArchive, stringEndArchive,
1920 0, 0, centDirCount, centDirCount,
1921 centDirSize, centDirOffset, len(self._comment))
1922 self.fp.write(endrec)
1923 self.fp.write(self._comment)
1924 self.fp.flush()
1925
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001926 def _fpclose(self, fp):
1927 assert self._fileRefCnt > 0
1928 self._fileRefCnt -= 1
1929 if not self._fileRefCnt and not self._filePassed:
1930 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001931
1932
1933class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001934 """Class to create ZIP archives with Python library files and packages."""
1935
Georg Brandl8334fd92010-12-04 10:26:46 +00001936 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001937 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001938 ZipFile.__init__(self, file, mode=mode, compression=compression,
1939 allowZip64=allowZip64)
1940 self._optimize = optimize
1941
Christian Tismer59202e52013-10-21 03:59:23 +02001942 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001943 """Add all files from "pathname" to the ZIP archive.
1944
Fred Drake484d7352000-10-02 21:14:52 +00001945 If pathname is a package directory, search the directory and
1946 all package subdirectories recursively for all *.py and enter
1947 the modules into the archive. If pathname is a plain
1948 directory, listdir *.py and enter all modules. Else, pathname
1949 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001950 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001951 This method will compile the module.py into module.pyc if
1952 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001953 If filterfunc(pathname) is given, it is called with every argument.
1954 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001955 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001956 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001957 if filterfunc and not filterfunc(pathname):
1958 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001959 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001960 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001961 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001962 dir, name = os.path.split(pathname)
1963 if os.path.isdir(pathname):
1964 initname = os.path.join(pathname, "__init__.py")
1965 if os.path.isfile(initname):
1966 # This is a package directory, add it
1967 if basename:
1968 basename = "%s/%s" % (basename, name)
1969 else:
1970 basename = name
1971 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001972 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001973 fname, arcname = self._get_codename(initname[0:-3], basename)
1974 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001975 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001976 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001977 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001978 dirlist.remove("__init__.py")
1979 # Add all *.py files and package subdirectories
1980 for filename in dirlist:
1981 path = os.path.join(pathname, filename)
1982 root, ext = os.path.splitext(filename)
1983 if os.path.isdir(path):
1984 if os.path.isfile(os.path.join(path, "__init__.py")):
1985 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001986 self.writepy(path, basename,
1987 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001988 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001989 if filterfunc and not filterfunc(path):
1990 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001991 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001992 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001994 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001995 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001996 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001997 self.write(fname, arcname)
1998 else:
1999 # This is NOT a package directory, add its files at top level
2000 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002001 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002002 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002003 path = os.path.join(pathname, filename)
2004 root, ext = os.path.splitext(filename)
2005 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002006 if filterfunc and not filterfunc(path):
2007 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002008 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002009 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002010 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002011 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002012 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002013 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002014 self.write(fname, arcname)
2015 else:
2016 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002017 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002018 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002019 fname, arcname = self._get_codename(pathname[0:-3], basename)
2020 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002021 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002022 self.write(fname, arcname)
2023
2024 def _get_codename(self, pathname, basename):
2025 """Return (filename, archivename) for the path.
2026
Fred Drake484d7352000-10-02 21:14:52 +00002027 Given a module name path, return the correct file path and
2028 archive name, compiling if necessary. For example, given
2029 /python/lib/string, return (/python/lib/string.pyc, string).
2030 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002031 def _compile(file, optimize=-1):
2032 import py_compile
2033 if self.debug:
2034 print("Compiling", file)
2035 try:
2036 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002037 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002038 print(err.msg)
2039 return False
2040 return True
2041
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002042 file_py = pathname + ".py"
2043 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002044 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2045 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2046 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002047 if self._optimize == -1:
2048 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002049 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002050 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2051 # Use .pyc file.
2052 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002053 elif (os.path.isfile(pycache_opt0) and
2054 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002055 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2056 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002057 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002058 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002059 elif (os.path.isfile(pycache_opt1) and
2060 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2061 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002062 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002063 fname = pycache_opt1
2064 arcname = file_pyc
2065 elif (os.path.isfile(pycache_opt2) and
2066 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2067 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2068 # file name in the archive.
2069 fname = pycache_opt2
2070 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002071 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002072 # Compile py into PEP 3147 pyc file.
2073 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002074 if sys.flags.optimize == 0:
2075 fname = pycache_opt0
2076 elif sys.flags.optimize == 1:
2077 fname = pycache_opt1
2078 else:
2079 fname = pycache_opt2
2080 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002081 else:
2082 fname = arcname = file_py
2083 else:
2084 # new mode: use given optimization level
2085 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002086 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002087 arcname = file_pyc
2088 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002089 arcname = file_pyc
2090 if self._optimize == 1:
2091 fname = pycache_opt1
2092 elif self._optimize == 2:
2093 fname = pycache_opt2
2094 else:
2095 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2096 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002097 if not (os.path.isfile(fname) and
2098 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2099 if not _compile(file_py, optimize=self._optimize):
2100 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002101 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002102 if basename:
2103 archivename = "%s/%s" % (basename, archivename)
2104 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002105
2106
shireenraoa4e29912019-08-24 11:26:41 -04002107def _unique_everseen(iterable, key=None):
2108 "List unique elements, preserving order. Remember all elements ever seen."
2109 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2110 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2111 seen = set()
2112 seen_add = seen.add
2113 if key is None:
2114 for element in itertools.filterfalse(seen.__contains__, iterable):
2115 seen_add(element)
2116 yield element
2117 else:
2118 for element in iterable:
2119 k = key(element)
2120 if k not in seen:
2121 seen_add(k)
2122 yield element
2123
2124
2125def _parents(path):
2126 """
2127 Given a path with elements separated by
2128 posixpath.sep, generate all parents of that path.
2129
2130 >>> list(_parents('b/d'))
2131 ['b']
2132 >>> list(_parents('/b/d/'))
2133 ['/b']
2134 >>> list(_parents('b/d/f/'))
2135 ['b/d', 'b']
2136 >>> list(_parents('b'))
2137 []
2138 >>> list(_parents(''))
2139 []
2140 """
2141 return itertools.islice(_ancestry(path), 1, None)
2142
2143
2144def _ancestry(path):
2145 """
2146 Given a path with elements separated by
2147 posixpath.sep, generate all elements of that path
2148
2149 >>> list(_ancestry('b/d'))
2150 ['b/d', 'b']
2151 >>> list(_ancestry('/b/d/'))
2152 ['/b/d', '/b']
2153 >>> list(_ancestry('b/d/f/'))
2154 ['b/d/f', 'b/d', 'b']
2155 >>> list(_ancestry('b'))
2156 ['b']
2157 >>> list(_ancestry(''))
2158 []
2159 """
2160 path = path.rstrip(posixpath.sep)
2161 while path and path != posixpath.sep:
2162 yield path
2163 path, tail = posixpath.split(path)
2164
2165
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002166class Path:
2167 """
2168 A pathlib-compatible interface for zip files.
2169
2170 Consider a zip file with this structure::
2171
2172 .
2173 ├── a.txt
2174 └── b
2175 ├── c.txt
2176 └── d
2177 └── e.txt
2178
2179 >>> data = io.BytesIO()
2180 >>> zf = ZipFile(data, 'w')
2181 >>> zf.writestr('a.txt', 'content of a')
2182 >>> zf.writestr('b/c.txt', 'content of c')
2183 >>> zf.writestr('b/d/e.txt', 'content of e')
2184 >>> zf.filename = 'abcde.zip'
2185
2186 Path accepts the zipfile object itself or a filename
2187
2188 >>> root = Path(zf)
2189
2190 From there, several path operations are available.
2191
2192 Directory iteration (including the zip file itself):
2193
2194 >>> a, b = root.iterdir()
2195 >>> a
2196 Path('abcde.zip', 'a.txt')
2197 >>> b
2198 Path('abcde.zip', 'b/')
2199
2200 name property:
2201
2202 >>> b.name
2203 'b'
2204
2205 join with divide operator:
2206
2207 >>> c = b / 'c.txt'
2208 >>> c
2209 Path('abcde.zip', 'b/c.txt')
2210 >>> c.name
2211 'c.txt'
2212
2213 Read text:
2214
2215 >>> c.read_text()
2216 'content of c'
2217
2218 existence:
2219
2220 >>> c.exists()
2221 True
2222 >>> (b / 'missing.txt').exists()
2223 False
2224
Xtreak0d702272019-06-03 04:42:33 +05302225 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002226
2227 >>> str(c)
2228 'abcde.zip/b/c.txt'
2229 """
2230
2231 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2232
2233 def __init__(self, root, at=""):
2234 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2235 self.at = at
2236
2237 @property
2238 def open(self):
2239 return functools.partial(self.root.open, self.at)
2240
2241 @property
2242 def name(self):
2243 return posixpath.basename(self.at.rstrip("/"))
2244
2245 def read_text(self, *args, **kwargs):
2246 with self.open() as strm:
2247 return io.TextIOWrapper(strm, *args, **kwargs).read()
2248
2249 def read_bytes(self):
2250 with self.open() as strm:
2251 return strm.read()
2252
2253 def _is_child(self, path):
2254 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2255
2256 def _next(self, at):
2257 return Path(self.root, at)
2258
2259 def is_dir(self):
2260 return not self.at or self.at.endswith("/")
2261
2262 def is_file(self):
2263 return not self.is_dir()
2264
2265 def exists(self):
2266 return self.at in self._names()
2267
2268 def iterdir(self):
2269 if not self.is_dir():
2270 raise ValueError("Can't listdir a file")
2271 subs = map(self._next, self._names())
2272 return filter(self._is_child, subs)
2273
2274 def __str__(self):
2275 return posixpath.join(self.root.filename, self.at)
2276
2277 def __repr__(self):
2278 return self.__repr.format(self=self)
2279
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002280 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002281 next = posixpath.join(self.at, add)
2282 next_dir = posixpath.join(self.at, add, "")
2283 names = self._names()
2284 return self._next(next_dir if next not in names and next_dir in names else next)
2285
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002286 __truediv__ = joinpath
2287
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002288 @staticmethod
shireenraoa4e29912019-08-24 11:26:41 -04002289 def _implied_dirs(names):
2290 return _unique_everseen(
2291 parent + "/"
2292 for name in names
2293 for parent in _parents(name)
2294 if parent + "/" not in names
2295 )
2296
2297 @classmethod
2298 def _add_implied_dirs(cls, names):
2299 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002300
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002301 @property
2302 def parent(self):
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002303 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002304 if parent_at:
2305 parent_at += '/'
2306 return self._next(parent_at)
2307
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002308 def _names(self):
2309 return self._add_implied_dirs(self.root.namelist())
2310
2311
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002312def main(args=None):
2313 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002314
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002315 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002316 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002317 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002318 group.add_argument('-l', '--list', metavar='<zipfile>',
2319 help='Show listing of a zipfile')
2320 group.add_argument('-e', '--extract', nargs=2,
2321 metavar=('<zipfile>', '<output_dir>'),
2322 help='Extract zipfile into target dir')
2323 group.add_argument('-c', '--create', nargs='+',
2324 metavar=('<name>', '<file>'),
2325 help='Create zipfile from sources')
2326 group.add_argument('-t', '--test', metavar='<zipfile>',
2327 help='Test if a zipfile is valid')
2328 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002329
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002330 if args.test is not None:
2331 src = args.test
2332 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002333 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002334 if badfile:
2335 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002336 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002337
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002338 elif args.list is not None:
2339 src = args.list
2340 with ZipFile(src, 'r') as zf:
2341 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002342
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002343 elif args.extract is not None:
2344 src, curdir = args.extract
2345 with ZipFile(src, 'r') as zf:
2346 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002347
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002348 elif args.create is not None:
2349 zip_name = args.create.pop(0)
2350 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002351
2352 def addToZip(zf, path, zippath):
2353 if os.path.isfile(path):
2354 zf.write(path, zippath, ZIP_DEFLATED)
2355 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002356 if zippath:
2357 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002358 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002359 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002360 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002361 # else: ignore
2362
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002363 with ZipFile(zip_name, 'w') as zf:
2364 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002365 zippath = os.path.basename(path)
2366 if not zippath:
2367 zippath = os.path.basename(os.path.dirname(path))
2368 if zippath in ('', os.curdir, os.pardir):
2369 zippath = ''
2370 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002371
2372if __name__ == "__main__":
2373 main()