blob: 6201edc8d7386a3f46c2efa1f5d67fae20fc9953 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
shireenraoa4e29912019-08-24 11:26:41 -040010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200379 self.compress_size = 0 # Size of the compressed file
380 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000382 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000383 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000468 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000469 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000471 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000473 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 elif ln == 0:
475 counts = ()
476 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300477 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478
479 idx = 0
480
481 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000482 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000483 self.file_size = counts[idx]
484 idx += 1
485
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000486 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000487 self.compress_size = counts[idx]
488 idx += 1
489
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000490 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000491 old = self.header_offset
492 self.header_offset = counts[idx]
493 idx+=1
494
495 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000496
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200497 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200498 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200499 """Construct an appropriate ZipInfo for a file on the filesystem.
500
501 filename should be the path to a file or directory on the filesystem.
502
503 arcname is the name which it will have within the archive (by default,
504 this will be the same as filename, but without a drive letter and with
505 leading path separators removed).
506 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200507 if isinstance(filename, os.PathLike):
508 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 st = os.stat(filename)
510 isdir = stat.S_ISDIR(st.st_mode)
511 mtime = time.localtime(st.st_mtime)
512 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200513 if not strict_timestamps and date_time[0] < 1980:
514 date_time = (1980, 1, 1, 0, 0, 0)
515 elif not strict_timestamps and date_time[0] > 2107:
516 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200517 # Create ZipInfo instance to store file information
518 if arcname is None:
519 arcname = filename
520 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
521 while arcname[0] in (os.sep, os.altsep):
522 arcname = arcname[1:]
523 if isdir:
524 arcname += '/'
525 zinfo = cls(arcname, date_time)
526 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
527 if isdir:
528 zinfo.file_size = 0
529 zinfo.external_attr |= 0x10 # MS-DOS directory flag
530 else:
531 zinfo.file_size = st.st_size
532
533 return zinfo
534
535 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300536 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200537 return self.filename[-1] == '/'
538
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000539
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300540# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
541# internal keys. We noticed that a direct implementation is faster than
542# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300544_crctable = None
545def _gen_crc(crc):
546 for j in range(8):
547 if crc & 1:
548 crc = (crc >> 1) ^ 0xEDB88320
549 else:
550 crc >>= 1
551 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000552
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300553# ZIP supports a password-based form of encryption. Even though known
554# plaintext attacks have been found against it, it is still useful
555# to be able to get data out of such a file.
556#
557# Usage:
558# zd = _ZipDecrypter(mypwd)
559# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000560
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300561def _ZipDecrypter(pwd):
562 key0 = 305419896
563 key1 = 591751049
564 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 global _crctable
567 if _crctable is None:
568 _crctable = list(map(_gen_crc, range(256)))
569 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000570
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300571 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000574
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300575 def update_keys(c):
576 nonlocal key0, key1, key2
577 key0 = crc32(c, key0)
578 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
579 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
580 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000581
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300582 for p in pwd:
583 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000584
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300585 def decrypter(data):
586 """Decrypt a bytes object."""
587 result = bytearray()
588 append = result.append
589 for c in data:
590 k = key2 | 2
591 c ^= ((k * (k^1)) >> 8) & 0xFF
592 update_keys(c)
593 append(c)
594 return bytes(result)
595
596 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000597
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200598
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599class LZMACompressor:
600
601 def __init__(self):
602 self._comp = None
603
604 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200605 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200606 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200607 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200608 ])
609 return struct.pack('<BBH', 9, 4, len(props)) + props
610
611 def compress(self, data):
612 if self._comp is None:
613 return self._init() + self._comp.compress(data)
614 return self._comp.compress(data)
615
616 def flush(self):
617 if self._comp is None:
618 return self._init() + self._comp.flush()
619 return self._comp.flush()
620
621
622class LZMADecompressor:
623
624 def __init__(self):
625 self._decomp = None
626 self._unconsumed = b''
627 self.eof = False
628
629 def decompress(self, data):
630 if self._decomp is None:
631 self._unconsumed += data
632 if len(self._unconsumed) <= 4:
633 return b''
634 psize, = struct.unpack('<H', self._unconsumed[2:4])
635 if len(self._unconsumed) <= 4 + psize:
636 return b''
637
638 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200639 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
640 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200641 ])
642 data = self._unconsumed[4 + psize:]
643 del self._unconsumed
644
645 result = self._decomp.decompress(data)
646 self.eof = self._decomp.eof
647 return result
648
649
650compressor_names = {
651 0: 'store',
652 1: 'shrink',
653 2: 'reduce',
654 3: 'reduce',
655 4: 'reduce',
656 5: 'reduce',
657 6: 'implode',
658 7: 'tokenize',
659 8: 'deflate',
660 9: 'deflate64',
661 10: 'implode',
662 12: 'bzip2',
663 14: 'lzma',
664 18: 'terse',
665 19: 'lz77',
666 97: 'wavpack',
667 98: 'ppmd',
668}
669
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200670def _check_compression(compression):
671 if compression == ZIP_STORED:
672 pass
673 elif compression == ZIP_DEFLATED:
674 if not zlib:
675 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200676 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200677 elif compression == ZIP_BZIP2:
678 if not bz2:
679 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200680 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200681 elif compression == ZIP_LZMA:
682 if not lzma:
683 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200684 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300686 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687
688
Bo Baylesce237c72018-01-29 23:54:07 -0600689def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600691 if compresslevel is not None:
692 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
693 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600695 if compresslevel is not None:
696 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200697 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600698 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200699 elif compress_type == ZIP_LZMA:
700 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200701 else:
702 return None
703
704
705def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300706 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200707 if compress_type == ZIP_STORED:
708 return None
709 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200710 return zlib.decompressobj(-15)
711 elif compress_type == ZIP_BZIP2:
712 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200713 elif compress_type == ZIP_LZMA:
714 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200715 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200716 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200717 if descr:
718 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
719 else:
720 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200721
722
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200723class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300724 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200725 self._file = file
726 self._pos = pos
727 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200728 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300729 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700730 self.seekable = file.seekable
731 self.tell = file.tell
732
733 def seek(self, offset, whence=0):
734 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200735 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700736 raise ValueError("Can't reposition in the ZIP file while "
737 "there is an open writing handle on it. "
738 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200739 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700740 self._pos = self._file.tell()
741 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200742
743 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200744 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300745 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300746 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300747 "is an open writing handle on it. "
748 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200749 self._file.seek(self._pos)
750 data = self._file.read(n)
751 self._pos = self._file.tell()
752 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200753
754 def close(self):
755 if self._file is not None:
756 fileobj = self._file
757 self._file = None
758 self._close(fileobj)
759
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200760# Provide the tell method for unseekable stream
761class _Tellable:
762 def __init__(self, fp):
763 self.fp = fp
764 self.offset = 0
765
766 def write(self, data):
767 n = self.fp.write(data)
768 self.offset += n
769 return n
770
771 def tell(self):
772 return self.offset
773
774 def flush(self):
775 self.fp.flush()
776
777 def close(self):
778 self.fp.close()
779
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200780
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000781class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 """File-like object for reading an archive member.
783 Is returned by ZipFile.open().
784 """
785
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000786 # Max size supported by decompressor.
787 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000789 # Read from compressed files in 4k blocks.
790 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000791
John Jolly066df4f2018-01-30 01:51:35 -0700792 # Chunk size to read during seek
793 MAX_SEEK_READ = 1 << 24
794
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200795 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000796 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797 self._fileobj = fileobj
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200798 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000799 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800
Ezio Melotti92b47432010-01-28 01:44:41 +0000801 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000802 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200803 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000804
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200805 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000806
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200807 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000808 self._readbuffer = b''
809 self._offset = 0
810
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000811 self.newlines = None
812
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000813 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 self.name = zipinfo.filename
815
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000816 if hasattr(zipinfo, 'CRC'):
817 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000818 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000819 else:
820 self._expected_crc = None
821
John Jolly066df4f2018-01-30 01:51:35 -0700822 self._seekable = False
823 try:
824 if fileobj.seekable():
825 self._orig_compress_start = fileobj.tell()
826 self._orig_compress_size = zipinfo.compress_size
827 self._orig_file_size = zipinfo.file_size
828 self._orig_start_crc = self._running_crc
829 self._seekable = True
830 except AttributeError:
831 pass
832
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200833 self._decrypter = None
834 if pwd:
835 if zipinfo.flag_bits & 0x8:
836 # compare against the file type from extended local headers
837 check_byte = (zipinfo._raw_time >> 8) & 0xff
838 else:
839 # compare against the CRC otherwise
840 check_byte = (zipinfo.CRC >> 24) & 0xff
841 h = self._init_decrypter()
842 if h != check_byte:
843 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
844
845
846 def _init_decrypter(self):
847 self._decrypter = _ZipDecrypter(self._pwd)
848 # The first 12 bytes in the cypher stream is an encryption header
849 # used to strengthen the algorithm. The first 11 bytes are
850 # completely random, while the 12th contains the MSB of the CRC,
851 # or the MSB of the file time depending on the header type
852 # and is used to check the correctness of the password.
853 header = self._fileobj.read(12)
854 self._compress_left -= 12
855 return self._decrypter(header)[11]
856
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200857 def __repr__(self):
858 result = ['<%s.%s' % (self.__class__.__module__,
859 self.__class__.__qualname__)]
860 if not self.closed:
861 result.append(' name=%r mode=%r' % (self.name, self.mode))
862 if self._compress_type != ZIP_STORED:
863 result.append(' compress_type=%s' %
864 compressor_names.get(self._compress_type,
865 self._compress_type))
866 else:
867 result.append(' [closed]')
868 result.append('>')
869 return ''.join(result)
870
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000871 def readline(self, limit=-1):
872 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000873
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000874 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876
Serhiy Storchakae670be22016-06-11 19:32:44 +0300877 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000878 # Shortcut common case - newline found in buffer.
879 i = self._readbuffer.find(b'\n', self._offset) + 1
880 if i > 0:
881 line = self._readbuffer[self._offset: i]
882 self._offset = i
883 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884
Serhiy Storchakae670be22016-06-11 19:32:44 +0300885 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000886
887 def peek(self, n=1):
888 """Returns buffered bytes without advancing the position."""
889 if n > len(self._readbuffer) - self._offset:
890 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200891 if len(chunk) > self._offset:
892 self._readbuffer = chunk + self._readbuffer[self._offset:]
893 self._offset = 0
894 else:
895 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000896
897 # Return up to 512 bytes to reduce allocation overhead for tight loops.
898 return self._readbuffer[self._offset: self._offset + 512]
899
900 def readable(self):
901 return True
902
903 def read(self, n=-1):
904 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800905 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200907 if n is None or n < 0:
908 buf = self._readbuffer[self._offset:]
909 self._readbuffer = b''
910 self._offset = 0
911 while not self._eof:
912 buf += self._read1(self.MAX_N)
913 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000914
Antoine Pitrou78157b32012-06-23 16:44:48 +0200915 end = n + self._offset
916 if end < len(self._readbuffer):
917 buf = self._readbuffer[self._offset:end]
918 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200919 return buf
920
Antoine Pitrou78157b32012-06-23 16:44:48 +0200921 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200922 buf = self._readbuffer[self._offset:]
923 self._readbuffer = b''
924 self._offset = 0
925 while n > 0 and not self._eof:
926 data = self._read1(n)
927 if n < len(data):
928 self._readbuffer = data
929 self._offset = n
930 buf += data[:n]
931 break
932 buf += data
933 n -= len(data)
934 return buf
935
936 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000937 # Update the CRC using the given data.
938 if self._expected_crc is None:
939 # No need to compute the CRC if we don't have a reference value
940 return
Martin Panterb82032f2015-12-11 05:19:29 +0000941 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000942 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200943 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000944 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000945
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000946 def read1(self, n):
947 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200949 if n is None or n < 0:
950 buf = self._readbuffer[self._offset:]
951 self._readbuffer = b''
952 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300953 while not self._eof:
954 data = self._read1(self.MAX_N)
955 if data:
956 buf += data
957 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200958 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959
Antoine Pitrou78157b32012-06-23 16:44:48 +0200960 end = n + self._offset
961 if end < len(self._readbuffer):
962 buf = self._readbuffer[self._offset:end]
963 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200964 return buf
965
Antoine Pitrou78157b32012-06-23 16:44:48 +0200966 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200967 buf = self._readbuffer[self._offset:]
968 self._readbuffer = b''
969 self._offset = 0
970 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300971 while not self._eof:
972 data = self._read1(n)
973 if n < len(data):
974 self._readbuffer = data
975 self._offset = n
976 buf += data[:n]
977 break
978 if data:
979 buf += data
980 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200981 return buf
982
983 def _read1(self, n):
984 # Read up to n compressed bytes with at most one read() system call,
985 # decrypt and decompress them.
986 if self._eof or n <= 0:
987 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000989 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200990 if self._compress_type == ZIP_DEFLATED:
991 ## Handle unconsumed data.
992 data = self._decompressor.unconsumed_tail
993 if n > len(data):
994 data += self._read2(n - len(data))
995 else:
996 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000997
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200998 if self._compress_type == ZIP_STORED:
999 self._eof = self._compress_left <= 0
1000 elif self._compress_type == ZIP_DEFLATED:
1001 n = max(n, self.MIN_READ_SIZE)
1002 data = self._decompressor.decompress(data, n)
1003 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +02001004 self._compress_left <= 0 and
1005 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001006 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001007 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001008 else:
1009 data = self._decompressor.decompress(data)
1010 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001012 data = data[:self._left]
1013 self._left -= len(data)
1014 if self._left <= 0:
1015 self._eof = True
1016 self._update_crc(data)
1017 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001018
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001019 def _read2(self, n):
1020 if self._compress_left <= 0:
1021 return b''
1022
1023 n = max(n, self.MIN_READ_SIZE)
1024 n = min(n, self._compress_left)
1025
1026 data = self._fileobj.read(n)
1027 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001028 if not data:
1029 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001030
1031 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001032 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001033 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034
Łukasz Langae94980a2010-11-22 23:31:26 +00001035 def close(self):
1036 try:
1037 if self._close_fileobj:
1038 self._fileobj.close()
1039 finally:
1040 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001041
John Jolly066df4f2018-01-30 01:51:35 -07001042 def seekable(self):
1043 return self._seekable
1044
1045 def seek(self, offset, whence=0):
1046 if not self._seekable:
1047 raise io.UnsupportedOperation("underlying stream is not seekable")
1048 curr_pos = self.tell()
1049 if whence == 0: # Seek from start of file
1050 new_pos = offset
1051 elif whence == 1: # Seek from current position
1052 new_pos = curr_pos + offset
1053 elif whence == 2: # Seek from EOF
1054 new_pos = self._orig_file_size + offset
1055 else:
1056 raise ValueError("whence must be os.SEEK_SET (0), "
1057 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1058
1059 if new_pos > self._orig_file_size:
1060 new_pos = self._orig_file_size
1061
1062 if new_pos < 0:
1063 new_pos = 0
1064
1065 read_offset = new_pos - curr_pos
1066 buff_offset = read_offset + self._offset
1067
1068 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1069 # Just move the _offset index if the new position is in the _readbuffer
1070 self._offset = buff_offset
1071 read_offset = 0
1072 elif read_offset < 0:
1073 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001074 self._fileobj.seek(self._orig_compress_start)
1075 self._running_crc = self._orig_start_crc
1076 self._compress_left = self._orig_compress_size
1077 self._left = self._orig_file_size
1078 self._readbuffer = b''
1079 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001080 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001081 self._eof = False
1082 read_offset = new_pos
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001083 if self._decrypter is not None:
1084 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001085
1086 while read_offset > 0:
1087 read_len = min(self.MAX_SEEK_READ, read_offset)
1088 self.read(read_len)
1089 read_offset -= read_len
1090
1091 return self.tell()
1092
1093 def tell(self):
1094 if not self._seekable:
1095 raise io.UnsupportedOperation("underlying stream is not seekable")
1096 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1097 return filepos
1098
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001099
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001100class _ZipWriteFile(io.BufferedIOBase):
1101 def __init__(self, zf, zinfo, zip64):
1102 self._zinfo = zinfo
1103 self._zip64 = zip64
1104 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001105 self._compressor = _get_compressor(zinfo.compress_type,
1106 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001107 self._file_size = 0
1108 self._compress_size = 0
1109 self._crc = 0
1110
1111 @property
1112 def _fileobj(self):
1113 return self._zipfile.fp
1114
1115 def writable(self):
1116 return True
1117
1118 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001119 if self.closed:
1120 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001121 nbytes = len(data)
1122 self._file_size += nbytes
1123 self._crc = crc32(data, self._crc)
1124 if self._compressor:
1125 data = self._compressor.compress(data)
1126 self._compress_size += len(data)
1127 self._fileobj.write(data)
1128 return nbytes
1129
1130 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001131 if self.closed:
1132 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001133 try:
1134 super().close()
1135 # Flush any data from the compressor, and update header info
1136 if self._compressor:
1137 buf = self._compressor.flush()
1138 self._compress_size += len(buf)
1139 self._fileobj.write(buf)
1140 self._zinfo.compress_size = self._compress_size
1141 else:
1142 self._zinfo.compress_size = self._file_size
1143 self._zinfo.CRC = self._crc
1144 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001145
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001146 # Write updated header info
1147 if self._zinfo.flag_bits & 0x08:
1148 # Write CRC and file sizes after the file data
1149 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1150 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1151 self._zinfo.compress_size, self._zinfo.file_size))
1152 self._zipfile.start_dir = self._fileobj.tell()
1153 else:
1154 if not self._zip64:
1155 if self._file_size > ZIP64_LIMIT:
1156 raise RuntimeError(
1157 'File size unexpectedly exceeded ZIP64 limit')
1158 if self._compress_size > ZIP64_LIMIT:
1159 raise RuntimeError(
1160 'Compressed size unexpectedly exceeded ZIP64 limit')
1161 # Seek backwards and write file header (which will now include
1162 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001163
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001164 # Preserve current position in file
1165 self._zipfile.start_dir = self._fileobj.tell()
1166 self._fileobj.seek(self._zinfo.header_offset)
1167 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1168 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001169
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001170 # Successfully written: Add file to our caches
1171 self._zipfile.filelist.append(self._zinfo)
1172 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1173 finally:
1174 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001175
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001176
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001177
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001178class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001179 """ Class with methods to open, read, write, close, list zip files.
1180
Bo Baylesce237c72018-01-29 23:54:07 -06001181 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1182 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001183
Fred Drake3d9091e2001-03-26 15:49:24 +00001184 file: Either the path to the file, or a file-like object.
1185 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001186 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1187 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001188 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1189 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001190 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1191 needed, otherwise it will raise an exception when this would
1192 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001193 compresslevel: None (default for the given compression type) or an integer
1194 specifying the level to pass to the compressor.
1195 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1196 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1197 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001198
Fred Drake3d9091e2001-03-26 15:49:24 +00001199 """
Fred Drake484d7352000-10-02 21:14:52 +00001200
Fred Drake90eac282001-02-28 05:29:34 +00001201 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001202 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001203
Bo Baylesce237c72018-01-29 23:54:07 -06001204 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001205 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001206 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1207 or append 'a'."""
1208 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001209 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001210
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001211 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001212
1213 self._allowZip64 = allowZip64
1214 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001215 self.debug = 0 # Level of printing: 0 through 3
1216 self.NameToInfo = {} # Find file info given name
1217 self.filelist = [] # List of ZipInfo instances for archive
1218 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001219 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001220 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001221 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001222 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001223 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001224
Fred Drake3d9091e2001-03-26 15:49:24 +00001225 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001226 if isinstance(file, os.PathLike):
1227 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001228 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001229 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001230 self._filePassed = 0
1231 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001232 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1233 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001234 filemode = modeDict[mode]
1235 while True:
1236 try:
1237 self.fp = io.open(file, filemode)
1238 except OSError:
1239 if filemode in modeDict:
1240 filemode = modeDict[filemode]
1241 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001242 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001243 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001244 else:
1245 self._filePassed = 1
1246 self.fp = file
1247 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001248 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001249 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001250 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001251 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001252
Antoine Pitrou17babc52012-11-17 23:50:08 +01001253 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001254 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001255 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001256 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001257 # set the modified flag so central directory gets written
1258 # even if no files are added to the archive
1259 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001260 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001261 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001262 except (AttributeError, OSError):
1263 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001264 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001265 self._seekable = False
1266 else:
1267 # Some file-like objects can provide tell() but not seek()
1268 try:
1269 self.fp.seek(self.start_dir)
1270 except (AttributeError, OSError):
1271 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001272 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001273 try:
1274 # See if file is a zip file
1275 self._RealGetContents()
1276 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001277 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001278 except BadZipFile:
1279 # file is not a zip file, just append
1280 self.fp.seek(0, 2)
1281
1282 # set the modified flag so central directory gets written
1283 # even if no files are added to the archive
1284 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001285 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001286 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001287 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001288 except:
1289 fp = self.fp
1290 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001291 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001292 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001294 def __enter__(self):
1295 return self
1296
1297 def __exit__(self, type, value, traceback):
1298 self.close()
1299
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001300 def __repr__(self):
1301 result = ['<%s.%s' % (self.__class__.__module__,
1302 self.__class__.__qualname__)]
1303 if self.fp is not None:
1304 if self._filePassed:
1305 result.append(' file=%r' % self.fp)
1306 elif self.filename is not None:
1307 result.append(' filename=%r' % self.filename)
1308 result.append(' mode=%r' % self.mode)
1309 else:
1310 result.append(' [closed]')
1311 result.append('>')
1312 return ''.join(result)
1313
Tim Peters7d3bad62001-04-04 18:56:49 +00001314 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001315 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001317 try:
1318 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001319 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001320 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001321 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001322 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001323 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001324 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001325 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1326 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001327 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001328
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001329 # "concat" is zero, unless zip was concatenated to another file
1330 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001331 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1332 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001333 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001334
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001335 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001336 inferred = concat + offset_cd
1337 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001339 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001341 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001342 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001343 total = 0
1344 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001345 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001346 if len(centdir) != sizeCentralDir:
1347 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001348 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001349 if centdir[_CD_SIGNATURE] != stringCentralDir:
1350 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001351 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001352 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001353 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001354 flags = centdir[5]
1355 if flags & 0x800:
1356 # UTF-8 file names extension
1357 filename = filename.decode('utf-8')
1358 else:
1359 # Historical ZIP filename encoding
1360 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001361 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001362 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001363 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1364 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001365 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001367 x.flag_bits, x.compress_type, t, d,
1368 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001369 if x.extract_version > MAX_EXTRACT_VERSION:
1370 raise NotImplementedError("zip file version %.1f" %
1371 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001372 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1373 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001374 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001375 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001376 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001377
1378 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001379 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001380 self.filelist.append(x)
1381 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001382
1383 # update total bytes read from central directory
1384 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1385 + centdir[_CD_EXTRA_FIELD_LENGTH]
1386 + centdir[_CD_COMMENT_LENGTH])
1387
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001389 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001390
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001391
1392 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001393 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001394 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001395
1396 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001397 """Return a list of class ZipInfo instances for files in the
1398 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001399 return self.filelist
1400
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001401 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001402 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001403 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1404 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001405 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001406 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001407 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1408 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001409
1410 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001411 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001412 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001413 for zinfo in self.filelist:
1414 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001415 # Read by chunks, to avoid an OverflowError or a
1416 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001417 with self.open(zinfo.filename, "r") as f:
1418 while f.read(chunk_size): # Check CRC-32
1419 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001420 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001421 return zinfo.filename
1422
1423 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001424 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001425 info = self.NameToInfo.get(name)
1426 if info is None:
1427 raise KeyError(
1428 'There is no item named %r in the archive' % name)
1429
1430 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001431
Thomas Wouterscf297e42007-02-23 15:07:44 +00001432 def setpassword(self, pwd):
1433 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001434 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001435 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001436 if pwd:
1437 self.pwd = pwd
1438 else:
1439 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001440
R David Murrayf50b38a2012-04-12 18:44:58 -04001441 @property
1442 def comment(self):
1443 """The comment text associated with the ZIP file."""
1444 return self._comment
1445
1446 @comment.setter
1447 def comment(self, comment):
1448 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001449 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001450 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001451 if len(comment) > ZIP_MAX_COMMENT:
1452 import warnings
1453 warnings.warn('Archive comment is too long; truncating to %d bytes'
1454 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001455 comment = comment[:ZIP_MAX_COMMENT]
1456 self._comment = comment
1457 self._didModify = True
1458
Thomas Wouterscf297e42007-02-23 15:07:44 +00001459 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001460 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001461 with self.open(name, "r", pwd) as fp:
1462 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001463
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001464 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001465 """Return file-like object for 'name'.
1466
1467 name is a string for the file name within the ZIP file, or a ZipInfo
1468 object.
1469
1470 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1471 write to a file newly added to the archive.
1472
1473 pwd is the password to decrypt files (only used for reading).
1474
1475 When writing, if the file size is not known in advance but may exceed
1476 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1477 files. If the size is known in advance, it is best to pass a ZipInfo
1478 instance for name, with zinfo.file_size set.
1479 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001480 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001481 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001482 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001483 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001484 if pwd and (mode == "w"):
1485 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001486 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001487 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001488 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001489
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001490 # Make sure we have an info object
1491 if isinstance(name, ZipInfo):
1492 # 'name' is already an info object
1493 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001494 elif mode == 'w':
1495 zinfo = ZipInfo(name)
1496 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001497 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001498 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001499 # Get info object for name
1500 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001501
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001502 if mode == 'w':
1503 return self._open_to_write(zinfo, force_zip64=force_zip64)
1504
1505 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001506 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001507 "is an open writing handle on it. "
1508 "Close the writing handle before trying to read.")
1509
1510 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001511 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001512 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1513 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001514 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001515 # Skip the file header:
1516 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001517 if len(fheader) != sizeFileHeader:
1518 raise BadZipFile("Truncated file header")
1519 fheader = struct.unpack(structFileHeader, fheader)
1520 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001521 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001522
Antoine Pitrou17babc52012-11-17 23:50:08 +01001523 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1524 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1525 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001526
Antoine Pitrou8572da52012-11-17 23:52:05 +01001527 if zinfo.flag_bits & 0x20:
1528 # Zip 2.7: compressed patched data
1529 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001530
Antoine Pitrou8572da52012-11-17 23:52:05 +01001531 if zinfo.flag_bits & 0x40:
1532 # strong encryption
1533 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001534
Antoine Pitrou17babc52012-11-17 23:50:08 +01001535 if zinfo.flag_bits & 0x800:
1536 # UTF-8 filename
1537 fname_str = fname.decode("utf-8")
1538 else:
1539 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001540
Antoine Pitrou17babc52012-11-17 23:50:08 +01001541 if fname_str != zinfo.orig_filename:
1542 raise BadZipFile(
1543 'File name in directory %r and header %r differ.'
1544 % (zinfo.orig_filename, fname))
1545
1546 # check for encrypted flag & handle password
1547 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001548 if is_encrypted:
1549 if not pwd:
1550 pwd = self.pwd
1551 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001552 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001553 "required for extraction" % name)
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001554 else:
1555 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001556
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001557 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001558 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001559 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001560 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001561
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001562 def _open_to_write(self, zinfo, force_zip64=False):
1563 if force_zip64 and not self._allowZip64:
1564 raise ValueError(
1565 "force_zip64 is True, but allowZip64 was False when opening "
1566 "the ZIP file."
1567 )
1568 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001569 raise ValueError("Can't write to the ZIP file while there is "
1570 "another write handle open on it. "
1571 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001572
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001573 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001574 zinfo.compress_size = 0
1575 zinfo.CRC = 0
1576
1577 zinfo.flag_bits = 0x00
1578 if zinfo.compress_type == ZIP_LZMA:
1579 # Compressed data includes an end-of-stream (EOS) marker
1580 zinfo.flag_bits |= 0x02
1581 if not self._seekable:
1582 zinfo.flag_bits |= 0x08
1583
1584 if not zinfo.external_attr:
1585 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1586
1587 # Compressed size can be larger than uncompressed size
1588 zip64 = self._allowZip64 and \
1589 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1590
1591 if self._seekable:
1592 self.fp.seek(self.start_dir)
1593 zinfo.header_offset = self.fp.tell()
1594
1595 self._writecheck(zinfo)
1596 self._didModify = True
1597
1598 self.fp.write(zinfo.FileHeader(zip64))
1599
1600 self._writing = True
1601 return _ZipWriteFile(self, zinfo, zip64)
1602
Christian Heimes790c8232008-01-07 21:14:23 +00001603 def extract(self, member, path=None, pwd=None):
1604 """Extract a member from the archive to the current working directory,
1605 using its full name. Its file information is extracted as accurately
1606 as possible. `member' may be a filename or a ZipInfo object. You can
1607 specify a different directory using `path'.
1608 """
Christian Heimes790c8232008-01-07 21:14:23 +00001609 if path is None:
1610 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001611 else:
1612 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001613
1614 return self._extract_member(member, path, pwd)
1615
1616 def extractall(self, path=None, members=None, pwd=None):
1617 """Extract all members from the archive to the current working
1618 directory. `path' specifies a different directory to extract to.
1619 `members' is optional and must be a subset of the list returned
1620 by namelist().
1621 """
1622 if members is None:
1623 members = self.namelist()
1624
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001625 if path is None:
1626 path = os.getcwd()
1627 else:
1628 path = os.fspath(path)
1629
Christian Heimes790c8232008-01-07 21:14:23 +00001630 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001631 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001632
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001633 @classmethod
1634 def _sanitize_windows_name(cls, arcname, pathsep):
1635 """Replace bad characters and remove trailing dots from parts."""
1636 table = cls._windows_illegal_name_trans_table
1637 if not table:
1638 illegal = ':<>|"?*'
1639 table = str.maketrans(illegal, '_' * len(illegal))
1640 cls._windows_illegal_name_trans_table = table
1641 arcname = arcname.translate(table)
1642 # remove trailing dots
1643 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1644 # rejoin, removing empty parts.
1645 arcname = pathsep.join(x for x in arcname if x)
1646 return arcname
1647
Christian Heimes790c8232008-01-07 21:14:23 +00001648 def _extract_member(self, member, targetpath, pwd):
1649 """Extract the ZipInfo object 'member' to a physical
1650 file on the path targetpath.
1651 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001652 if not isinstance(member, ZipInfo):
1653 member = self.getinfo(member)
1654
Christian Heimes790c8232008-01-07 21:14:23 +00001655 # build the destination pathname, replacing
1656 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001657 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001658
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001659 if os.path.altsep:
1660 arcname = arcname.replace(os.path.altsep, os.path.sep)
1661 # interpret absolute pathname as relative, remove drive letter or
1662 # UNC path, redundant separators, "." and ".." components.
1663 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001664 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001665 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001666 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001667 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001668 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001669 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001670
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001671 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001672 targetpath = os.path.normpath(targetpath)
1673
1674 # Create all upper directories if necessary.
1675 upperdirs = os.path.dirname(targetpath)
1676 if upperdirs and not os.path.exists(upperdirs):
1677 os.makedirs(upperdirs)
1678
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001679 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001680 if not os.path.isdir(targetpath):
1681 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001682 return targetpath
1683
Antoine Pitrou17babc52012-11-17 23:50:08 +01001684 with self.open(member, pwd=pwd) as source, \
1685 open(targetpath, "wb") as target:
1686 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001687
1688 return targetpath
1689
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001690 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001691 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001692 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001693 import warnings
1694 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001695 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001696 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001697 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001698 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001699 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001700 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001701 if not self._allowZip64:
1702 requires_zip64 = None
1703 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1704 requires_zip64 = "Files count"
1705 elif zinfo.file_size > ZIP64_LIMIT:
1706 requires_zip64 = "Filesize"
1707 elif zinfo.header_offset > ZIP64_LIMIT:
1708 requires_zip64 = "Zipfile size"
1709 if requires_zip64:
1710 raise LargeZipFile(requires_zip64 +
1711 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001712
Bo Baylesce237c72018-01-29 23:54:07 -06001713 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001714 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001715 """Put the bytes from filename into the archive under the name
1716 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001717 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001718 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001719 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001720 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001721 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001722 "Can't write to ZIP archive while an open writing handle exists"
1723 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001724
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001725 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001726 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001727
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001728 if zinfo.is_dir():
1729 zinfo.compress_size = 0
1730 zinfo.CRC = 0
1731 else:
1732 if compress_type is not None:
1733 zinfo.compress_type = compress_type
1734 else:
1735 zinfo.compress_type = self.compression
1736
Bo Baylesce237c72018-01-29 23:54:07 -06001737 if compresslevel is not None:
1738 zinfo._compresslevel = compresslevel
1739 else:
1740 zinfo._compresslevel = self.compresslevel
1741
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001742 if zinfo.is_dir():
1743 with self._lock:
1744 if self._seekable:
1745 self.fp.seek(self.start_dir)
1746 zinfo.header_offset = self.fp.tell() # Start of header bytes
1747 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001748 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001749 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001750
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001751 self._writecheck(zinfo)
1752 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001753
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001754 self.filelist.append(zinfo)
1755 self.NameToInfo[zinfo.filename] = zinfo
1756 self.fp.write(zinfo.FileHeader(False))
1757 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001758 else:
1759 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1760 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001761
Bo Baylesce237c72018-01-29 23:54:07 -06001762 def writestr(self, zinfo_or_arcname, data,
1763 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001764 """Write a file into the archive. The contents is 'data', which
1765 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1766 it is encoded as UTF-8 first.
1767 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001768 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001769 if isinstance(data, str):
1770 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001771 if not isinstance(zinfo_or_arcname, ZipInfo):
1772 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001773 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001774 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001775 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001776 if zinfo.filename[-1] == '/':
1777 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1778 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1779 else:
1780 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001781 else:
1782 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001783
1784 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001785 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001786 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001787 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001788 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001789 "Can't write to ZIP archive while an open writing handle exists."
1790 )
1791
1792 if compress_type is not None:
1793 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001794
Bo Baylesce237c72018-01-29 23:54:07 -06001795 if compresslevel is not None:
1796 zinfo._compresslevel = compresslevel
1797
Guido van Rossum85825dc2007-08-27 17:03:28 +00001798 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001799 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001800 with self.open(zinfo, mode='w') as dest:
1801 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001802
1803 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001804 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001805 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001806
1807 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001808 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001809 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001810 if self.fp is None:
1811 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001812
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001813 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001814 raise ValueError("Can't close the ZIP file while there is "
1815 "an open writing handle on it. "
1816 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001817
Antoine Pitrou17babc52012-11-17 23:50:08 +01001818 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001819 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001820 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001821 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001822 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001823 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001824 finally:
1825 fp = self.fp
1826 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001827 self._fpclose(fp)
1828
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001829 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001830 for zinfo in self.filelist: # write central directory
1831 dt = zinfo.date_time
1832 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1833 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1834 extra = []
1835 if zinfo.file_size > ZIP64_LIMIT \
1836 or zinfo.compress_size > ZIP64_LIMIT:
1837 extra.append(zinfo.file_size)
1838 extra.append(zinfo.compress_size)
1839 file_size = 0xffffffff
1840 compress_size = 0xffffffff
1841 else:
1842 file_size = zinfo.file_size
1843 compress_size = zinfo.compress_size
1844
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001845 if zinfo.header_offset > ZIP64_LIMIT:
1846 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001847 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001848 else:
1849 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001850
1851 extra_data = zinfo.extra
1852 min_version = 0
1853 if extra:
1854 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001855 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001856 extra_data = struct.pack(
1857 '<HH' + 'Q'*len(extra),
1858 1, 8*len(extra), *extra) + extra_data
1859
1860 min_version = ZIP64_VERSION
1861
1862 if zinfo.compress_type == ZIP_BZIP2:
1863 min_version = max(BZIP2_VERSION, min_version)
1864 elif zinfo.compress_type == ZIP_LZMA:
1865 min_version = max(LZMA_VERSION, min_version)
1866
1867 extract_version = max(min_version, zinfo.extract_version)
1868 create_version = max(min_version, zinfo.create_version)
1869 try:
1870 filename, flag_bits = zinfo._encodeFilenameFlags()
1871 centdir = struct.pack(structCentralDir,
1872 stringCentralDir, create_version,
1873 zinfo.create_system, extract_version, zinfo.reserved,
1874 flag_bits, zinfo.compress_type, dostime, dosdate,
1875 zinfo.CRC, compress_size, file_size,
1876 len(filename), len(extra_data), len(zinfo.comment),
1877 0, zinfo.internal_attr, zinfo.external_attr,
1878 header_offset)
1879 except DeprecationWarning:
1880 print((structCentralDir, stringCentralDir, create_version,
1881 zinfo.create_system, extract_version, zinfo.reserved,
1882 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1883 zinfo.CRC, compress_size, file_size,
1884 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1885 0, zinfo.internal_attr, zinfo.external_attr,
1886 header_offset), file=sys.stderr)
1887 raise
1888 self.fp.write(centdir)
1889 self.fp.write(filename)
1890 self.fp.write(extra_data)
1891 self.fp.write(zinfo.comment)
1892
1893 pos2 = self.fp.tell()
1894 # Write end-of-zip-archive record
1895 centDirCount = len(self.filelist)
1896 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001897 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001898 requires_zip64 = None
1899 if centDirCount > ZIP_FILECOUNT_LIMIT:
1900 requires_zip64 = "Files count"
1901 elif centDirOffset > ZIP64_LIMIT:
1902 requires_zip64 = "Central directory offset"
1903 elif centDirSize > ZIP64_LIMIT:
1904 requires_zip64 = "Central directory size"
1905 if requires_zip64:
1906 # Need to write the ZIP64 end-of-archive records
1907 if not self._allowZip64:
1908 raise LargeZipFile(requires_zip64 +
1909 " would require ZIP64 extensions")
1910 zip64endrec = struct.pack(
1911 structEndArchive64, stringEndArchive64,
1912 44, 45, 45, 0, 0, centDirCount, centDirCount,
1913 centDirSize, centDirOffset)
1914 self.fp.write(zip64endrec)
1915
1916 zip64locrec = struct.pack(
1917 structEndArchive64Locator,
1918 stringEndArchive64Locator, 0, pos2, 1)
1919 self.fp.write(zip64locrec)
1920 centDirCount = min(centDirCount, 0xFFFF)
1921 centDirSize = min(centDirSize, 0xFFFFFFFF)
1922 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1923
1924 endrec = struct.pack(structEndArchive, stringEndArchive,
1925 0, 0, centDirCount, centDirCount,
1926 centDirSize, centDirOffset, len(self._comment))
1927 self.fp.write(endrec)
1928 self.fp.write(self._comment)
1929 self.fp.flush()
1930
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001931 def _fpclose(self, fp):
1932 assert self._fileRefCnt > 0
1933 self._fileRefCnt -= 1
1934 if not self._fileRefCnt and not self._filePassed:
1935 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001936
1937
1938class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001939 """Class to create ZIP archives with Python library files and packages."""
1940
Georg Brandl8334fd92010-12-04 10:26:46 +00001941 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001942 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001943 ZipFile.__init__(self, file, mode=mode, compression=compression,
1944 allowZip64=allowZip64)
1945 self._optimize = optimize
1946
Christian Tismer59202e52013-10-21 03:59:23 +02001947 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001948 """Add all files from "pathname" to the ZIP archive.
1949
Fred Drake484d7352000-10-02 21:14:52 +00001950 If pathname is a package directory, search the directory and
1951 all package subdirectories recursively for all *.py and enter
1952 the modules into the archive. If pathname is a plain
1953 directory, listdir *.py and enter all modules. Else, pathname
1954 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001955 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001956 This method will compile the module.py into module.pyc if
1957 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001958 If filterfunc(pathname) is given, it is called with every argument.
1959 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001960 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001961 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001962 if filterfunc and not filterfunc(pathname):
1963 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001964 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001965 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001966 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001967 dir, name = os.path.split(pathname)
1968 if os.path.isdir(pathname):
1969 initname = os.path.join(pathname, "__init__.py")
1970 if os.path.isfile(initname):
1971 # This is a package directory, add it
1972 if basename:
1973 basename = "%s/%s" % (basename, name)
1974 else:
1975 basename = name
1976 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001977 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001978 fname, arcname = self._get_codename(initname[0:-3], basename)
1979 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001980 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001981 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001982 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001983 dirlist.remove("__init__.py")
1984 # Add all *.py files and package subdirectories
1985 for filename in dirlist:
1986 path = os.path.join(pathname, filename)
1987 root, ext = os.path.splitext(filename)
1988 if os.path.isdir(path):
1989 if os.path.isfile(os.path.join(path, "__init__.py")):
1990 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001991 self.writepy(path, basename,
1992 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001994 if filterfunc and not filterfunc(path):
1995 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001996 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001997 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001998 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001999 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002000 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002001 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002002 self.write(fname, arcname)
2003 else:
2004 # This is NOT a package directory, add its files at top level
2005 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002006 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002007 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002008 path = os.path.join(pathname, filename)
2009 root, ext = os.path.splitext(filename)
2010 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002011 if filterfunc and not filterfunc(path):
2012 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002013 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002014 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002015 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002016 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002017 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002018 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002019 self.write(fname, arcname)
2020 else:
2021 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002022 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002023 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002024 fname, arcname = self._get_codename(pathname[0:-3], basename)
2025 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002026 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002027 self.write(fname, arcname)
2028
2029 def _get_codename(self, pathname, basename):
2030 """Return (filename, archivename) for the path.
2031
Fred Drake484d7352000-10-02 21:14:52 +00002032 Given a module name path, return the correct file path and
2033 archive name, compiling if necessary. For example, given
2034 /python/lib/string, return (/python/lib/string.pyc, string).
2035 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002036 def _compile(file, optimize=-1):
2037 import py_compile
2038 if self.debug:
2039 print("Compiling", file)
2040 try:
2041 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002042 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002043 print(err.msg)
2044 return False
2045 return True
2046
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002047 file_py = pathname + ".py"
2048 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002049 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2050 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2051 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002052 if self._optimize == -1:
2053 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002054 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002055 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2056 # Use .pyc file.
2057 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002058 elif (os.path.isfile(pycache_opt0) and
2059 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002060 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2061 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002062 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002063 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002064 elif (os.path.isfile(pycache_opt1) and
2065 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2066 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002067 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002068 fname = pycache_opt1
2069 arcname = file_pyc
2070 elif (os.path.isfile(pycache_opt2) and
2071 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2072 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2073 # file name in the archive.
2074 fname = pycache_opt2
2075 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002076 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002077 # Compile py into PEP 3147 pyc file.
2078 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002079 if sys.flags.optimize == 0:
2080 fname = pycache_opt0
2081 elif sys.flags.optimize == 1:
2082 fname = pycache_opt1
2083 else:
2084 fname = pycache_opt2
2085 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002086 else:
2087 fname = arcname = file_py
2088 else:
2089 # new mode: use given optimization level
2090 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002091 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002092 arcname = file_pyc
2093 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002094 arcname = file_pyc
2095 if self._optimize == 1:
2096 fname = pycache_opt1
2097 elif self._optimize == 2:
2098 fname = pycache_opt2
2099 else:
2100 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2101 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002102 if not (os.path.isfile(fname) and
2103 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2104 if not _compile(file_py, optimize=self._optimize):
2105 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002106 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002107 if basename:
2108 archivename = "%s/%s" % (basename, archivename)
2109 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002110
2111
shireenraoa4e29912019-08-24 11:26:41 -04002112def _unique_everseen(iterable, key=None):
2113 "List unique elements, preserving order. Remember all elements ever seen."
2114 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2115 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2116 seen = set()
2117 seen_add = seen.add
2118 if key is None:
2119 for element in itertools.filterfalse(seen.__contains__, iterable):
2120 seen_add(element)
2121 yield element
2122 else:
2123 for element in iterable:
2124 k = key(element)
2125 if k not in seen:
2126 seen_add(k)
2127 yield element
2128
2129
2130def _parents(path):
2131 """
2132 Given a path with elements separated by
2133 posixpath.sep, generate all parents of that path.
2134
2135 >>> list(_parents('b/d'))
2136 ['b']
2137 >>> list(_parents('/b/d/'))
2138 ['/b']
2139 >>> list(_parents('b/d/f/'))
2140 ['b/d', 'b']
2141 >>> list(_parents('b'))
2142 []
2143 >>> list(_parents(''))
2144 []
2145 """
2146 return itertools.islice(_ancestry(path), 1, None)
2147
2148
2149def _ancestry(path):
2150 """
2151 Given a path with elements separated by
2152 posixpath.sep, generate all elements of that path
2153
2154 >>> list(_ancestry('b/d'))
2155 ['b/d', 'b']
2156 >>> list(_ancestry('/b/d/'))
2157 ['/b/d', '/b']
2158 >>> list(_ancestry('b/d/f/'))
2159 ['b/d/f', 'b/d', 'b']
2160 >>> list(_ancestry('b'))
2161 ['b']
2162 >>> list(_ancestry(''))
2163 []
2164 """
2165 path = path.rstrip(posixpath.sep)
2166 while path and path != posixpath.sep:
2167 yield path
2168 path, tail = posixpath.split(path)
2169
2170
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002171class Path:
2172 """
2173 A pathlib-compatible interface for zip files.
2174
2175 Consider a zip file with this structure::
2176
2177 .
2178 ├── a.txt
2179 └── b
2180 ├── c.txt
2181 └── d
2182 └── e.txt
2183
2184 >>> data = io.BytesIO()
2185 >>> zf = ZipFile(data, 'w')
2186 >>> zf.writestr('a.txt', 'content of a')
2187 >>> zf.writestr('b/c.txt', 'content of c')
2188 >>> zf.writestr('b/d/e.txt', 'content of e')
2189 >>> zf.filename = 'abcde.zip'
2190
2191 Path accepts the zipfile object itself or a filename
2192
2193 >>> root = Path(zf)
2194
2195 From there, several path operations are available.
2196
2197 Directory iteration (including the zip file itself):
2198
2199 >>> a, b = root.iterdir()
2200 >>> a
2201 Path('abcde.zip', 'a.txt')
2202 >>> b
2203 Path('abcde.zip', 'b/')
2204
2205 name property:
2206
2207 >>> b.name
2208 'b'
2209
2210 join with divide operator:
2211
2212 >>> c = b / 'c.txt'
2213 >>> c
2214 Path('abcde.zip', 'b/c.txt')
2215 >>> c.name
2216 'c.txt'
2217
2218 Read text:
2219
2220 >>> c.read_text()
2221 'content of c'
2222
2223 existence:
2224
2225 >>> c.exists()
2226 True
2227 >>> (b / 'missing.txt').exists()
2228 False
2229
Xtreak0d702272019-06-03 04:42:33 +05302230 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002231
2232 >>> str(c)
2233 'abcde.zip/b/c.txt'
2234 """
2235
2236 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2237
2238 def __init__(self, root, at=""):
2239 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2240 self.at = at
2241
2242 @property
2243 def open(self):
2244 return functools.partial(self.root.open, self.at)
2245
2246 @property
2247 def name(self):
2248 return posixpath.basename(self.at.rstrip("/"))
2249
2250 def read_text(self, *args, **kwargs):
2251 with self.open() as strm:
2252 return io.TextIOWrapper(strm, *args, **kwargs).read()
2253
2254 def read_bytes(self):
2255 with self.open() as strm:
2256 return strm.read()
2257
2258 def _is_child(self, path):
2259 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2260
2261 def _next(self, at):
2262 return Path(self.root, at)
2263
2264 def is_dir(self):
2265 return not self.at or self.at.endswith("/")
2266
2267 def is_file(self):
2268 return not self.is_dir()
2269
2270 def exists(self):
2271 return self.at in self._names()
2272
2273 def iterdir(self):
2274 if not self.is_dir():
2275 raise ValueError("Can't listdir a file")
2276 subs = map(self._next, self._names())
2277 return filter(self._is_child, subs)
2278
2279 def __str__(self):
2280 return posixpath.join(self.root.filename, self.at)
2281
2282 def __repr__(self):
2283 return self.__repr.format(self=self)
2284
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002285 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002286 next = posixpath.join(self.at, add)
2287 next_dir = posixpath.join(self.at, add, "")
2288 names = self._names()
2289 return self._next(next_dir if next not in names and next_dir in names else next)
2290
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002291 __truediv__ = joinpath
2292
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002293 @staticmethod
shireenraoa4e29912019-08-24 11:26:41 -04002294 def _implied_dirs(names):
2295 return _unique_everseen(
2296 parent + "/"
2297 for name in names
2298 for parent in _parents(name)
2299 if parent + "/" not in names
2300 )
2301
2302 @classmethod
2303 def _add_implied_dirs(cls, names):
2304 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002305
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002306 @property
2307 def parent(self):
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002308 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002309 if parent_at:
2310 parent_at += '/'
2311 return self._next(parent_at)
2312
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002313 def _names(self):
2314 return self._add_implied_dirs(self.root.namelist())
2315
2316
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002317def main(args=None):
2318 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002319
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002320 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002321 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002322 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002323 group.add_argument('-l', '--list', metavar='<zipfile>',
2324 help='Show listing of a zipfile')
2325 group.add_argument('-e', '--extract', nargs=2,
2326 metavar=('<zipfile>', '<output_dir>'),
2327 help='Extract zipfile into target dir')
2328 group.add_argument('-c', '--create', nargs='+',
2329 metavar=('<name>', '<file>'),
2330 help='Create zipfile from sources')
2331 group.add_argument('-t', '--test', metavar='<zipfile>',
2332 help='Test if a zipfile is valid')
2333 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002334
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002335 if args.test is not None:
2336 src = args.test
2337 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002338 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002339 if badfile:
2340 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002341 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002342
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002343 elif args.list is not None:
2344 src = args.list
2345 with ZipFile(src, 'r') as zf:
2346 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002347
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002348 elif args.extract is not None:
2349 src, curdir = args.extract
2350 with ZipFile(src, 'r') as zf:
2351 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002352
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002353 elif args.create is not None:
2354 zip_name = args.create.pop(0)
2355 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356
2357 def addToZip(zf, path, zippath):
2358 if os.path.isfile(path):
2359 zf.write(path, zippath, ZIP_DEFLATED)
2360 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002361 if zippath:
2362 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002363 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002365 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002366 # else: ignore
2367
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002368 with ZipFile(zip_name, 'w') as zf:
2369 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002370 zippath = os.path.basename(path)
2371 if not zippath:
2372 zippath = os.path.basename(os.path.dirname(path))
2373 if zippath in ('', os.curdir, os.pardir):
2374 zippath = ''
2375 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002376
2377if __name__ == "__main__":
2378 main()