blob: 8903d6a42ee4ebba3c5b6847689bf9ca05d1c3af [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04007import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import io
shireenraoa4e29912019-08-24 11:26:41 -04009import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040011import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000012import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040013import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000014import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040015import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020016import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040017import time
Jason R. Coombse5bd7362020-02-11 21:58:47 -050018import contextlib
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Zackery Spytz9a81ab12020-03-23 07:29:36 -060039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
40 "Path"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000041
Georg Brandl4d540882010-10-28 06:42:33 +000042class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000043 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
45
46class LargeZipFile(Exception):
47 """
48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
49 and those extensions are disabled.
50 """
51
Georg Brandl4d540882010-10-28 06:42:33 +000052error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
53
Guido van Rossum32abe6f2000-03-31 17:30:02 +000054
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000055ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030056ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000057ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000058
Guido van Rossum32abe6f2000-03-31 17:30:02 +000059# constants for Zip file compression methods
60ZIP_STORED = 0
61ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020063ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000064# Other ZIP compression methods not supported
65
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020066DEFAULT_VERSION = 20
67ZIP64_VERSION = 45
68BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020070# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020071MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020072
Martin v. Löwisb09b8442008-07-03 14:13:42 +000073# Below are some formats and associated data for reading/writing headers using
74# the struct module. The names and structures of headers/records are those used
75# in the PKWARE description of the ZIP file format:
76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
77# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000078
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079# The "end of central directory" structure, magic number, size, and indices
80# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000081structEndArchive = b"<4s4H2LH"
82stringEndArchive = b"PK\005\006"
83sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000084
85_ECD_SIGNATURE = 0
86_ECD_DISK_NUMBER = 1
87_ECD_DISK_START = 2
88_ECD_ENTRIES_THIS_DISK = 3
89_ECD_ENTRIES_TOTAL = 4
90_ECD_SIZE = 5
91_ECD_OFFSET = 6
92_ECD_COMMENT_SIZE = 7
93# These last two indices are not part of the structure as defined in the
94# spec, but they are used internally by this module as a convenience
95_ECD_COMMENT = 8
96_ECD_LOCATION = 9
97
98# The "central directory" structure, magic number, size, and indices
99# of entries in the structure (section V.F in the format document)
100structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000101stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102sizeCentralDir = struct.calcsize(structCentralDir)
103
Fred Drake3e038e52001-02-28 17:56:26 +0000104# indexes of entries in the central directory structure
105_CD_SIGNATURE = 0
106_CD_CREATE_VERSION = 1
107_CD_CREATE_SYSTEM = 2
108_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000110_CD_FLAG_BITS = 5
111_CD_COMPRESS_TYPE = 6
112_CD_TIME = 7
113_CD_DATE = 8
114_CD_CRC = 9
115_CD_COMPRESSED_SIZE = 10
116_CD_UNCOMPRESSED_SIZE = 11
117_CD_FILENAME_LENGTH = 12
118_CD_EXTRA_FIELD_LENGTH = 13
119_CD_COMMENT_LENGTH = 14
120_CD_DISK_NUMBER_START = 15
121_CD_INTERNAL_FILE_ATTRIBUTES = 16
122_CD_EXTERNAL_FILE_ATTRIBUTES = 17
123_CD_LOCAL_HEADER_OFFSET = 18
124
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125# The "local file header" structure, magic number, size, and indices
126# (section V.A in the format document)
127structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129sizeFileHeader = struct.calcsize(structFileHeader)
130
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_SIGNATURE = 0
132_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000133_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000134_FH_GENERAL_PURPOSE_FLAG_BITS = 3
135_FH_COMPRESSION_METHOD = 4
136_FH_LAST_MOD_TIME = 5
137_FH_LAST_MOD_DATE = 6
138_FH_CRC = 7
139_FH_COMPRESSED_SIZE = 8
140_FH_UNCOMPRESSED_SIZE = 9
141_FH_FILENAME_LENGTH = 10
142_FH_EXTRA_FIELD_LENGTH = 11
143
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000145structEndArchive64Locator = "<4sLQL"
146stringEndArchive64Locator = b"PK\x06\x07"
147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000148
149# The "Zip64 end of central directory" record, magic number, size, and indices
150# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000151structEndArchive64 = "<4sQ2H2L4Q"
152stringEndArchive64 = b"PK\x06\x06"
153sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000154
155_CD64_SIGNATURE = 0
156_CD64_DIRECTORY_RECSIZE = 1
157_CD64_CREATE_VERSION = 2
158_CD64_EXTRACT_VERSION = 3
159_CD64_DISK_NUMBER = 4
160_CD64_DISK_NUMBER_START = 5
161_CD64_NUMBER_ENTRIES_THIS_DISK = 6
162_CD64_NUMBER_ENTRIES_TOTAL = 7
163_CD64_DIRECTORY_SIZE = 8
164_CD64_OFFSET_START_CENTDIR = 9
165
Silas Sewell4ba3b502018-09-18 13:00:05 -0400166_DD_SIGNATURE = 0x08074b50
167
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300168_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
169
170def _strip_extra(extra, xids):
171 # Remove Extra Fields with specified IDs.
172 unpack = _EXTRA_FIELD_STRUCT.unpack
173 modified = False
174 buffer = []
175 start = i = 0
176 while i + 4 <= len(extra):
177 xid, xlen = unpack(extra[i : i + 4])
178 j = i + 4 + xlen
179 if xid in xids:
180 if i != start:
181 buffer.append(extra[start : i])
182 start = j
183 modified = True
184 i = j
185 if not modified:
186 return extra
187 return b''.join(buffer)
188
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000189def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000190 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000191 if _EndRecData(fp):
192 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200193 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000194 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000195 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000196
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000197def is_zipfile(filename):
198 """Quickly see if a file is a ZIP file by checking the magic number.
199
200 The filename argument may be a file or file-like object too.
201 """
202 result = False
203 try:
204 if hasattr(filename, "read"):
205 result = _check_zipfile(fp=filename)
206 else:
207 with open(filename, "rb") as fp:
208 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200209 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000210 pass
211 return result
212
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213def _EndRecData64(fpin, offset, endrec):
214 """
215 Read the ZIP64 end-of-archive records and use that to update endrec
216 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000217 try:
218 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200219 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000220 # If the seek fails, the file is not large enough to contain a ZIP64
221 # end-of-archive record, so just return the end record we were given.
222 return endrec
223
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000224 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200225 if len(data) != sizeEndCentDir64Locator:
226 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000227 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
228 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000229 return endrec
230
Francisco Facioniab0716e2019-05-29 00:15:11 +0100231 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000232 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000233
234 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
236 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200237 if len(data) != sizeEndCentDir64:
238 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000239 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200240 dircount, dircount2, dirsize, diroffset = \
241 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000242 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000243 return endrec
244
245 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000246 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec[_ECD_DISK_NUMBER] = disk_num
248 endrec[_ECD_DISK_START] = disk_dir
249 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
250 endrec[_ECD_ENTRIES_TOTAL] = dircount2
251 endrec[_ECD_SIZE] = dirsize
252 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000253 return endrec
254
255
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000256def _EndRecData(fpin):
257 """Return data from the "End of Central Directory" record, or None.
258
259 The data is a list of the nine items in the ZIP "End of central dir"
260 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
262 # Determine file size
263 fpin.seek(0, 2)
264 filesize = fpin.tell()
265
266 # Check to see if this is ZIP file with no archive comment (the
267 # "end of central directory" structure should be the last item in the
268 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000269 try:
270 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200271 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000272 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000273 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200274 if (len(data) == sizeEndCentDir and
275 data[0:4] == stringEndArchive and
276 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000277 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000278 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000279 endrec=list(endrec)
280
281 # Append a blank comment and record start offset
282 endrec.append(b"")
283 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000284
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000285 # Try to read the "Zip64 end of central directory" structure
286 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000287
288 # Either this is not a ZIP file, or it is a ZIP file with an archive
289 # comment. Search the end of the file for the "end of central directory"
290 # record signature. The comment is the last item in the ZIP file and may be
291 # up to 64K long. It is assumed that the "end of central directory" magic
292 # number does not appear in the comment.
293 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
294 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000295 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000296 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000297 if start >= 0:
298 # found the magic number; attempt to unpack and interpret
299 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200300 if len(recData) != sizeEndCentDir:
301 # Zip file is corrupted.
302 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000303 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400304 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
305 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
306 endrec.append(comment)
307 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000308
R David Murray4fbb9db2011-06-09 15:50:51 -0400309 # Try to read the "Zip64 end of central directory" structure
310 return _EndRecData64(fpin, maxCommentStart + start - filesize,
311 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000312
313 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200314 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000315
Fred Drake484d7352000-10-02 21:14:52 +0000316
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000317class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000318 """Class with attributes describing each file in the ZIP archive."""
319
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200321 'orig_filename',
322 'filename',
323 'date_time',
324 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600325 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200326 'comment',
327 'extra',
328 'create_system',
329 'create_version',
330 'extract_version',
331 'reserved',
332 'flag_bits',
333 'volume',
334 'internal_attr',
335 'external_attr',
336 'header_offset',
337 'CRC',
338 'compress_size',
339 'file_size',
340 '_raw_time',
341 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000342
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000343 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000344 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000345
346 # Terminate the file name at the first null byte. Null bytes in file
347 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000348 null_byte = filename.find(chr(0))
349 if null_byte >= 0:
350 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000351 # This is used to ensure paths in generated ZIP files always use
352 # forward slashes as the directory separator, as required by the
353 # ZIP format specification.
354 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000355 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000356
Greg Ward8e36d282003-06-18 00:53:06 +0000357 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000358 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800359
360 if date_time[0] < 1980:
361 raise ValueError('ZIP does not support timestamps before 1980')
362
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000364 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600365 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000366 self.comment = b"" # Comment for each file
367 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000368 if sys.platform == 'win32':
369 self.create_system = 0 # System which created ZIP archive
370 else:
371 # Assume everything else is unix-y
372 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200373 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
374 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000375 self.reserved = 0 # Must be zero
376 self.flag_bits = 0 # ZIP flag bits
377 self.volume = 0 # Volume number of file header
378 self.internal_attr = 0 # Internal attributes
379 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200380 self.compress_size = 0 # Size of the compressed file
381 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000383 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000384 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200386 def __repr__(self):
387 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
388 if self.compress_type != ZIP_STORED:
389 result.append(' compress_type=%s' %
390 compressor_names.get(self.compress_type,
391 self.compress_type))
392 hi = self.external_attr >> 16
393 lo = self.external_attr & 0xFFFF
394 if hi:
395 result.append(' filemode=%r' % stat.filemode(hi))
396 if lo:
397 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200398 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200399 if not isdir or self.file_size:
400 result.append(' file_size=%r' % self.file_size)
401 if ((not isdir or self.compress_size) and
402 (self.compress_type != ZIP_STORED or
403 self.file_size != self.compress_size)):
404 result.append(' compress_size=%r' % self.compress_size)
405 result.append('>')
406 return ''.join(result)
407
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200408 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200409 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000410 dt = self.date_time
411 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000412 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000413 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000414 # Set these to zero because we write them after the file data
415 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000416 else:
Tim Peterse1190062001-01-15 03:34:38 +0000417 CRC = self.CRC
418 compress_size = self.compress_size
419 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000420
421 extra = self.extra
422
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200423 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200424 if zip64 is None:
425 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
426 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000427 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000428 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200429 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200430 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
431 if not zip64:
432 raise LargeZipFile("Filesize would require ZIP64 extensions")
433 # File is larger than what fits into a 4 byte integer,
434 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000435 file_size = 0xffffffff
436 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200437 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000438
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200439 if self.compress_type == ZIP_BZIP2:
440 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200441 elif self.compress_type == ZIP_LZMA:
442 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200443
444 self.extract_version = max(min_version, self.extract_version)
445 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000446 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000447 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200448 self.extract_version, self.reserved, flag_bits,
449 self.compress_type, dostime, dosdate, CRC,
450 compress_size, file_size,
451 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000452 return header + filename + extra
453
454 def _encodeFilenameFlags(self):
455 try:
456 return self.filename.encode('ascii'), self.flag_bits
457 except UnicodeEncodeError:
458 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459
460 def _decodeExtra(self):
461 # Try to decode the extra field.
462 extra = self.extra
463 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700464 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000465 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200466 if ln+4 > len(extra):
467 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
468 if tp == 0x0001:
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200469 data = extra[4:ln+4]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470 # ZIP64 extension (large files and/or large archives)
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200471 try:
472 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
473 field = "File size"
474 self.file_size, = unpack('<Q', data[:8])
475 data = data[8:]
476 if self.compress_size == 0xFFFF_FFFF:
477 field = "Compress size"
478 self.compress_size, = unpack('<Q', data[:8])
479 data = data[8:]
480 if self.header_offset == 0xFFFF_FFFF:
481 field = "Header offset"
482 self.header_offset, = unpack('<Q', data[:8])
483 except struct.error:
484 raise BadZipFile(f"Corrupt zip64 extra field. "
485 f"{field} not found.") from None
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000486
487 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000488
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200489 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200490 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200491 """Construct an appropriate ZipInfo for a file on the filesystem.
492
493 filename should be the path to a file or directory on the filesystem.
494
495 arcname is the name which it will have within the archive (by default,
496 this will be the same as filename, but without a drive letter and with
497 leading path separators removed).
498 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200499 if isinstance(filename, os.PathLike):
500 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200501 st = os.stat(filename)
502 isdir = stat.S_ISDIR(st.st_mode)
503 mtime = time.localtime(st.st_mtime)
504 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200505 if not strict_timestamps and date_time[0] < 1980:
506 date_time = (1980, 1, 1, 0, 0, 0)
507 elif not strict_timestamps and date_time[0] > 2107:
508 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 # Create ZipInfo instance to store file information
510 if arcname is None:
511 arcname = filename
512 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
513 while arcname[0] in (os.sep, os.altsep):
514 arcname = arcname[1:]
515 if isdir:
516 arcname += '/'
517 zinfo = cls(arcname, date_time)
518 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
519 if isdir:
520 zinfo.file_size = 0
521 zinfo.external_attr |= 0x10 # MS-DOS directory flag
522 else:
523 zinfo.file_size = st.st_size
524
525 return zinfo
526
527 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300528 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200529 return self.filename[-1] == '/'
530
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000531
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300532# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
533# internal keys. We noticed that a direct implementation is faster than
534# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000535
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300536_crctable = None
537def _gen_crc(crc):
538 for j in range(8):
539 if crc & 1:
540 crc = (crc >> 1) ^ 0xEDB88320
541 else:
542 crc >>= 1
543 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300545# ZIP supports a password-based form of encryption. Even though known
546# plaintext attacks have been found against it, it is still useful
547# to be able to get data out of such a file.
548#
549# Usage:
550# zd = _ZipDecrypter(mypwd)
551# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000552
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300553def _ZipDecrypter(pwd):
554 key0 = 305419896
555 key1 = 591751049
556 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000557
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300558 global _crctable
559 if _crctable is None:
560 _crctable = list(map(_gen_crc, range(256)))
561 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000562
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300563 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000564 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300565 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000566
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300567 def update_keys(c):
568 nonlocal key0, key1, key2
569 key0 = crc32(c, key0)
570 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
571 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
572 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000573
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300574 for p in pwd:
575 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000576
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300577 def decrypter(data):
578 """Decrypt a bytes object."""
579 result = bytearray()
580 append = result.append
581 for c in data:
582 k = key2 | 2
583 c ^= ((k * (k^1)) >> 8) & 0xFF
584 update_keys(c)
585 append(c)
586 return bytes(result)
587
588 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000589
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200590
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200591class LZMACompressor:
592
593 def __init__(self):
594 self._comp = None
595
596 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200597 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200598 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200599 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200600 ])
601 return struct.pack('<BBH', 9, 4, len(props)) + props
602
603 def compress(self, data):
604 if self._comp is None:
605 return self._init() + self._comp.compress(data)
606 return self._comp.compress(data)
607
608 def flush(self):
609 if self._comp is None:
610 return self._init() + self._comp.flush()
611 return self._comp.flush()
612
613
614class LZMADecompressor:
615
616 def __init__(self):
617 self._decomp = None
618 self._unconsumed = b''
619 self.eof = False
620
621 def decompress(self, data):
622 if self._decomp is None:
623 self._unconsumed += data
624 if len(self._unconsumed) <= 4:
625 return b''
626 psize, = struct.unpack('<H', self._unconsumed[2:4])
627 if len(self._unconsumed) <= 4 + psize:
628 return b''
629
630 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200631 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
632 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200633 ])
634 data = self._unconsumed[4 + psize:]
635 del self._unconsumed
636
637 result = self._decomp.decompress(data)
638 self.eof = self._decomp.eof
639 return result
640
641
642compressor_names = {
643 0: 'store',
644 1: 'shrink',
645 2: 'reduce',
646 3: 'reduce',
647 4: 'reduce',
648 5: 'reduce',
649 6: 'implode',
650 7: 'tokenize',
651 8: 'deflate',
652 9: 'deflate64',
653 10: 'implode',
654 12: 'bzip2',
655 14: 'lzma',
656 18: 'terse',
657 19: 'lz77',
658 97: 'wavpack',
659 98: 'ppmd',
660}
661
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200662def _check_compression(compression):
663 if compression == ZIP_STORED:
664 pass
665 elif compression == ZIP_DEFLATED:
666 if not zlib:
667 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200668 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200669 elif compression == ZIP_BZIP2:
670 if not bz2:
671 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200672 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200673 elif compression == ZIP_LZMA:
674 if not lzma:
675 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200676 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200677 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300678 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200679
680
Bo Baylesce237c72018-01-29 23:54:07 -0600681def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200682 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600683 if compresslevel is not None:
684 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
685 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200686 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600687 if compresslevel is not None:
688 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200689 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600690 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200691 elif compress_type == ZIP_LZMA:
692 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200693 else:
694 return None
695
696
697def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300698 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200699 if compress_type == ZIP_STORED:
700 return None
701 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200702 return zlib.decompressobj(-15)
703 elif compress_type == ZIP_BZIP2:
704 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200705 elif compress_type == ZIP_LZMA:
706 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200707 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200708 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200709 if descr:
710 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
711 else:
712 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713
714
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200715class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300716 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200717 self._file = file
718 self._pos = pos
719 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200720 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300721 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700722 self.seekable = file.seekable
723 self.tell = file.tell
724
725 def seek(self, offset, whence=0):
726 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200727 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700728 raise ValueError("Can't reposition in the ZIP file while "
729 "there is an open writing handle on it. "
730 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200731 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700732 self._pos = self._file.tell()
733 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200734
735 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200736 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300737 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300738 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300739 "is an open writing handle on it. "
740 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200741 self._file.seek(self._pos)
742 data = self._file.read(n)
743 self._pos = self._file.tell()
744 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200745
746 def close(self):
747 if self._file is not None:
748 fileobj = self._file
749 self._file = None
750 self._close(fileobj)
751
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200752# Provide the tell method for unseekable stream
753class _Tellable:
754 def __init__(self, fp):
755 self.fp = fp
756 self.offset = 0
757
758 def write(self, data):
759 n = self.fp.write(data)
760 self.offset += n
761 return n
762
763 def tell(self):
764 return self.offset
765
766 def flush(self):
767 self.fp.flush()
768
769 def close(self):
770 self.fp.close()
771
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200772
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000773class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 """File-like object for reading an archive member.
775 Is returned by ZipFile.open().
776 """
777
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000778 # Max size supported by decompressor.
779 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000781 # Read from compressed files in 4k blocks.
782 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000783
John Jolly066df4f2018-01-30 01:51:35 -0700784 # Chunk size to read during seek
785 MAX_SEEK_READ = 1 << 24
786
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200787 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000788 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000789 self._fileobj = fileobj
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200790 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000791 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000792
Ezio Melotti92b47432010-01-28 01:44:41 +0000793 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000794 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200795 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000796
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200797 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800 self._readbuffer = b''
801 self._offset = 0
802
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000803 self.newlines = None
804
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000805 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 self.name = zipinfo.filename
807
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000808 if hasattr(zipinfo, 'CRC'):
809 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000810 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000811 else:
812 self._expected_crc = None
813
John Jolly066df4f2018-01-30 01:51:35 -0700814 self._seekable = False
815 try:
816 if fileobj.seekable():
817 self._orig_compress_start = fileobj.tell()
818 self._orig_compress_size = zipinfo.compress_size
819 self._orig_file_size = zipinfo.file_size
820 self._orig_start_crc = self._running_crc
821 self._seekable = True
822 except AttributeError:
823 pass
824
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200825 self._decrypter = None
826 if pwd:
827 if zipinfo.flag_bits & 0x8:
828 # compare against the file type from extended local headers
829 check_byte = (zipinfo._raw_time >> 8) & 0xff
830 else:
831 # compare against the CRC otherwise
832 check_byte = (zipinfo.CRC >> 24) & 0xff
833 h = self._init_decrypter()
834 if h != check_byte:
835 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
836
837
838 def _init_decrypter(self):
839 self._decrypter = _ZipDecrypter(self._pwd)
840 # The first 12 bytes in the cypher stream is an encryption header
841 # used to strengthen the algorithm. The first 11 bytes are
842 # completely random, while the 12th contains the MSB of the CRC,
843 # or the MSB of the file time depending on the header type
844 # and is used to check the correctness of the password.
845 header = self._fileobj.read(12)
846 self._compress_left -= 12
847 return self._decrypter(header)[11]
848
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200849 def __repr__(self):
850 result = ['<%s.%s' % (self.__class__.__module__,
851 self.__class__.__qualname__)]
852 if not self.closed:
853 result.append(' name=%r mode=%r' % (self.name, self.mode))
854 if self._compress_type != ZIP_STORED:
855 result.append(' compress_type=%s' %
856 compressor_names.get(self._compress_type,
857 self._compress_type))
858 else:
859 result.append(' [closed]')
860 result.append('>')
861 return ''.join(result)
862
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000863 def readline(self, limit=-1):
864 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000866 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868
Serhiy Storchakae670be22016-06-11 19:32:44 +0300869 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000870 # Shortcut common case - newline found in buffer.
871 i = self._readbuffer.find(b'\n', self._offset) + 1
872 if i > 0:
873 line = self._readbuffer[self._offset: i]
874 self._offset = i
875 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876
Serhiy Storchakae670be22016-06-11 19:32:44 +0300877 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000878
879 def peek(self, n=1):
880 """Returns buffered bytes without advancing the position."""
881 if n > len(self._readbuffer) - self._offset:
882 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200883 if len(chunk) > self._offset:
884 self._readbuffer = chunk + self._readbuffer[self._offset:]
885 self._offset = 0
886 else:
887 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000888
889 # Return up to 512 bytes to reduce allocation overhead for tight loops.
890 return self._readbuffer[self._offset: self._offset + 512]
891
892 def readable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +1100893 if self.closed:
894 raise ValueError("I/O operation on closed file.")
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000895 return True
896
897 def read(self, n=-1):
898 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800899 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 """
Daniel Hillier8d62df62019-11-30 19:30:47 +1100901 if self.closed:
902 raise ValueError("read from closed file.")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 if n is None or n < 0:
904 buf = self._readbuffer[self._offset:]
905 self._readbuffer = b''
906 self._offset = 0
907 while not self._eof:
908 buf += self._read1(self.MAX_N)
909 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910
Antoine Pitrou78157b32012-06-23 16:44:48 +0200911 end = n + self._offset
912 if end < len(self._readbuffer):
913 buf = self._readbuffer[self._offset:end]
914 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200915 return buf
916
Antoine Pitrou78157b32012-06-23 16:44:48 +0200917 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200918 buf = self._readbuffer[self._offset:]
919 self._readbuffer = b''
920 self._offset = 0
921 while n > 0 and not self._eof:
922 data = self._read1(n)
923 if n < len(data):
924 self._readbuffer = data
925 self._offset = n
926 buf += data[:n]
927 break
928 buf += data
929 n -= len(data)
930 return buf
931
932 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000933 # Update the CRC using the given data.
934 if self._expected_crc is None:
935 # No need to compute the CRC if we don't have a reference value
936 return
Martin Panterb82032f2015-12-11 05:19:29 +0000937 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000938 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200939 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000940 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000941
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000942 def read1(self, n):
943 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200945 if n is None or n < 0:
946 buf = self._readbuffer[self._offset:]
947 self._readbuffer = b''
948 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300949 while not self._eof:
950 data = self._read1(self.MAX_N)
951 if data:
952 buf += data
953 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200954 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955
Antoine Pitrou78157b32012-06-23 16:44:48 +0200956 end = n + self._offset
957 if end < len(self._readbuffer):
958 buf = self._readbuffer[self._offset:end]
959 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200960 return buf
961
Antoine Pitrou78157b32012-06-23 16:44:48 +0200962 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200963 buf = self._readbuffer[self._offset:]
964 self._readbuffer = b''
965 self._offset = 0
966 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300967 while not self._eof:
968 data = self._read1(n)
969 if n < len(data):
970 self._readbuffer = data
971 self._offset = n
972 buf += data[:n]
973 break
974 if data:
975 buf += data
976 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200977 return buf
978
979 def _read1(self, n):
980 # Read up to n compressed bytes with at most one read() system call,
981 # decrypt and decompress them.
982 if self._eof or n <= 0:
983 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000985 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200986 if self._compress_type == ZIP_DEFLATED:
987 ## Handle unconsumed data.
988 data = self._decompressor.unconsumed_tail
989 if n > len(data):
990 data += self._read2(n - len(data))
991 else:
992 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200994 if self._compress_type == ZIP_STORED:
995 self._eof = self._compress_left <= 0
996 elif self._compress_type == ZIP_DEFLATED:
997 n = max(n, self.MIN_READ_SIZE)
998 data = self._decompressor.decompress(data, n)
999 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +02001000 self._compress_left <= 0 and
1001 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001002 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001003 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001004 else:
1005 data = self._decompressor.decompress(data)
1006 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001007
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001008 data = data[:self._left]
1009 self._left -= len(data)
1010 if self._left <= 0:
1011 self._eof = True
1012 self._update_crc(data)
1013 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001014
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001015 def _read2(self, n):
1016 if self._compress_left <= 0:
1017 return b''
1018
1019 n = max(n, self.MIN_READ_SIZE)
1020 n = min(n, self._compress_left)
1021
1022 data = self._fileobj.read(n)
1023 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001024 if not data:
1025 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001026
1027 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001028 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001029 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001030
Łukasz Langae94980a2010-11-22 23:31:26 +00001031 def close(self):
1032 try:
1033 if self._close_fileobj:
1034 self._fileobj.close()
1035 finally:
1036 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001037
John Jolly066df4f2018-01-30 01:51:35 -07001038 def seekable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001039 if self.closed:
1040 raise ValueError("I/O operation on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001041 return self._seekable
1042
1043 def seek(self, offset, whence=0):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001044 if self.closed:
1045 raise ValueError("seek on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001046 if not self._seekable:
1047 raise io.UnsupportedOperation("underlying stream is not seekable")
1048 curr_pos = self.tell()
1049 if whence == 0: # Seek from start of file
1050 new_pos = offset
1051 elif whence == 1: # Seek from current position
1052 new_pos = curr_pos + offset
1053 elif whence == 2: # Seek from EOF
1054 new_pos = self._orig_file_size + offset
1055 else:
1056 raise ValueError("whence must be os.SEEK_SET (0), "
1057 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1058
1059 if new_pos > self._orig_file_size:
1060 new_pos = self._orig_file_size
1061
1062 if new_pos < 0:
1063 new_pos = 0
1064
1065 read_offset = new_pos - curr_pos
1066 buff_offset = read_offset + self._offset
1067
1068 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1069 # Just move the _offset index if the new position is in the _readbuffer
1070 self._offset = buff_offset
1071 read_offset = 0
1072 elif read_offset < 0:
1073 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001074 self._fileobj.seek(self._orig_compress_start)
1075 self._running_crc = self._orig_start_crc
1076 self._compress_left = self._orig_compress_size
1077 self._left = self._orig_file_size
1078 self._readbuffer = b''
1079 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001080 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001081 self._eof = False
1082 read_offset = new_pos
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001083 if self._decrypter is not None:
1084 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001085
1086 while read_offset > 0:
1087 read_len = min(self.MAX_SEEK_READ, read_offset)
1088 self.read(read_len)
1089 read_offset -= read_len
1090
1091 return self.tell()
1092
1093 def tell(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001094 if self.closed:
1095 raise ValueError("tell on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001096 if not self._seekable:
1097 raise io.UnsupportedOperation("underlying stream is not seekable")
1098 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1099 return filepos
1100
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001101
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001102class _ZipWriteFile(io.BufferedIOBase):
1103 def __init__(self, zf, zinfo, zip64):
1104 self._zinfo = zinfo
1105 self._zip64 = zip64
1106 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001107 self._compressor = _get_compressor(zinfo.compress_type,
1108 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001109 self._file_size = 0
1110 self._compress_size = 0
1111 self._crc = 0
1112
1113 @property
1114 def _fileobj(self):
1115 return self._zipfile.fp
1116
1117 def writable(self):
1118 return True
1119
1120 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001121 if self.closed:
1122 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001123 nbytes = len(data)
1124 self._file_size += nbytes
1125 self._crc = crc32(data, self._crc)
1126 if self._compressor:
1127 data = self._compressor.compress(data)
1128 self._compress_size += len(data)
1129 self._fileobj.write(data)
1130 return nbytes
1131
1132 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001133 if self.closed:
1134 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001135 try:
1136 super().close()
1137 # Flush any data from the compressor, and update header info
1138 if self._compressor:
1139 buf = self._compressor.flush()
1140 self._compress_size += len(buf)
1141 self._fileobj.write(buf)
1142 self._zinfo.compress_size = self._compress_size
1143 else:
1144 self._zinfo.compress_size = self._file_size
1145 self._zinfo.CRC = self._crc
1146 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001147
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001148 # Write updated header info
1149 if self._zinfo.flag_bits & 0x08:
1150 # Write CRC and file sizes after the file data
1151 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1152 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1153 self._zinfo.compress_size, self._zinfo.file_size))
1154 self._zipfile.start_dir = self._fileobj.tell()
1155 else:
1156 if not self._zip64:
1157 if self._file_size > ZIP64_LIMIT:
1158 raise RuntimeError(
1159 'File size unexpectedly exceeded ZIP64 limit')
1160 if self._compress_size > ZIP64_LIMIT:
1161 raise RuntimeError(
1162 'Compressed size unexpectedly exceeded ZIP64 limit')
1163 # Seek backwards and write file header (which will now include
1164 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001165
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001166 # Preserve current position in file
1167 self._zipfile.start_dir = self._fileobj.tell()
1168 self._fileobj.seek(self._zinfo.header_offset)
1169 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1170 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001171
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001172 # Successfully written: Add file to our caches
1173 self._zipfile.filelist.append(self._zinfo)
1174 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1175 finally:
1176 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001177
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001178
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001179
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001181 """ Class with methods to open, read, write, close, list zip files.
1182
Bo Baylesce237c72018-01-29 23:54:07 -06001183 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1184 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001185
Fred Drake3d9091e2001-03-26 15:49:24 +00001186 file: Either the path to the file, or a file-like object.
1187 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001188 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1189 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001190 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1191 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001192 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1193 needed, otherwise it will raise an exception when this would
1194 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001195 compresslevel: None (default for the given compression type) or an integer
1196 specifying the level to pass to the compressor.
1197 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1198 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1199 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001200
Fred Drake3d9091e2001-03-26 15:49:24 +00001201 """
Fred Drake484d7352000-10-02 21:14:52 +00001202
Fred Drake90eac282001-02-28 05:29:34 +00001203 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001204 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001205
Bo Baylesce237c72018-01-29 23:54:07 -06001206 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001207 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001208 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1209 or append 'a'."""
1210 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001211 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001212
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001213 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001214
1215 self._allowZip64 = allowZip64
1216 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001217 self.debug = 0 # Level of printing: 0 through 3
1218 self.NameToInfo = {} # Find file info given name
1219 self.filelist = [] # List of ZipInfo instances for archive
1220 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001221 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001222 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001223 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001224 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001225 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001226
Fred Drake3d9091e2001-03-26 15:49:24 +00001227 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001228 if isinstance(file, os.PathLike):
1229 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001230 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001231 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001232 self._filePassed = 0
1233 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001234 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1235 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001236 filemode = modeDict[mode]
1237 while True:
1238 try:
1239 self.fp = io.open(file, filemode)
1240 except OSError:
1241 if filemode in modeDict:
1242 filemode = modeDict[filemode]
1243 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001244 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001245 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001246 else:
1247 self._filePassed = 1
1248 self.fp = file
1249 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001250 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001251 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001252 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001253 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001254
Antoine Pitrou17babc52012-11-17 23:50:08 +01001255 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001256 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001257 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001258 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001259 # set the modified flag so central directory gets written
1260 # even if no files are added to the archive
1261 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001262 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001263 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001264 except (AttributeError, OSError):
1265 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001266 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001267 self._seekable = False
1268 else:
1269 # Some file-like objects can provide tell() but not seek()
1270 try:
1271 self.fp.seek(self.start_dir)
1272 except (AttributeError, OSError):
1273 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001274 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001275 try:
1276 # See if file is a zip file
1277 self._RealGetContents()
1278 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001279 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001280 except BadZipFile:
1281 # file is not a zip file, just append
1282 self.fp.seek(0, 2)
1283
1284 # set the modified flag so central directory gets written
1285 # even if no files are added to the archive
1286 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001287 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001288 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001289 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001290 except:
1291 fp = self.fp
1292 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001293 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001294 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001295
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001296 def __enter__(self):
1297 return self
1298
1299 def __exit__(self, type, value, traceback):
1300 self.close()
1301
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001302 def __repr__(self):
1303 result = ['<%s.%s' % (self.__class__.__module__,
1304 self.__class__.__qualname__)]
1305 if self.fp is not None:
1306 if self._filePassed:
1307 result.append(' file=%r' % self.fp)
1308 elif self.filename is not None:
1309 result.append(' filename=%r' % self.filename)
1310 result.append(' mode=%r' % self.mode)
1311 else:
1312 result.append(' [closed]')
1313 result.append('>')
1314 return ''.join(result)
1315
Tim Peters7d3bad62001-04-04 18:56:49 +00001316 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001317 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001319 try:
1320 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001321 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001322 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001323 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001324 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001326 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001327 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1328 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001329 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001330
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001331 # "concat" is zero, unless zip was concatenated to another file
1332 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001333 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1334 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001335 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001336
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001338 inferred = concat + offset_cd
1339 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001341 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001344 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 total = 0
1346 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001347 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001348 if len(centdir) != sizeCentralDir:
1349 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001351 if centdir[_CD_SIGNATURE] != stringCentralDir:
1352 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001353 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001354 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001355 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001356 flags = centdir[5]
1357 if flags & 0x800:
1358 # UTF-8 file names extension
1359 filename = filename.decode('utf-8')
1360 else:
1361 # Historical ZIP filename encoding
1362 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001363 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001364 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001365 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1366 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001367 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001368 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001369 x.flag_bits, x.compress_type, t, d,
1370 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001371 if x.extract_version > MAX_EXTRACT_VERSION:
1372 raise NotImplementedError("zip file version %.1f" %
1373 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001374 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1375 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001376 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001377 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001378 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001379
1380 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001381 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001382 self.filelist.append(x)
1383 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001384
1385 # update total bytes read from central directory
1386 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1387 + centdir[_CD_EXTRA_FIELD_LENGTH]
1388 + centdir[_CD_COMMENT_LENGTH])
1389
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001390 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001391 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001392
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001393
1394 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001395 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001396 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001397
1398 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001399 """Return a list of class ZipInfo instances for files in the
1400 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001401 return self.filelist
1402
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001403 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001404 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001405 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1406 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001407 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001408 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001409 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1410 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001411
1412 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001413 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001414 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001415 for zinfo in self.filelist:
1416 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001417 # Read by chunks, to avoid an OverflowError or a
1418 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001419 with self.open(zinfo.filename, "r") as f:
1420 while f.read(chunk_size): # Check CRC-32
1421 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001422 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001423 return zinfo.filename
1424
1425 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001426 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001427 info = self.NameToInfo.get(name)
1428 if info is None:
1429 raise KeyError(
1430 'There is no item named %r in the archive' % name)
1431
1432 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001433
Thomas Wouterscf297e42007-02-23 15:07:44 +00001434 def setpassword(self, pwd):
1435 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001436 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001437 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001438 if pwd:
1439 self.pwd = pwd
1440 else:
1441 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001442
R David Murrayf50b38a2012-04-12 18:44:58 -04001443 @property
1444 def comment(self):
1445 """The comment text associated with the ZIP file."""
1446 return self._comment
1447
1448 @comment.setter
1449 def comment(self, comment):
1450 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001451 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001452 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001453 if len(comment) > ZIP_MAX_COMMENT:
1454 import warnings
1455 warnings.warn('Archive comment is too long; truncating to %d bytes'
1456 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001457 comment = comment[:ZIP_MAX_COMMENT]
1458 self._comment = comment
1459 self._didModify = True
1460
Thomas Wouterscf297e42007-02-23 15:07:44 +00001461 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001462 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001463 with self.open(name, "r", pwd) as fp:
1464 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001465
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001466 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001467 """Return file-like object for 'name'.
1468
1469 name is a string for the file name within the ZIP file, or a ZipInfo
1470 object.
1471
1472 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1473 write to a file newly added to the archive.
1474
1475 pwd is the password to decrypt files (only used for reading).
1476
1477 When writing, if the file size is not known in advance but may exceed
1478 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1479 files. If the size is known in advance, it is best to pass a ZipInfo
1480 instance for name, with zinfo.file_size set.
1481 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001482 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001483 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001484 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001485 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001486 if pwd and (mode == "w"):
1487 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001488 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001489 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001490 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001491
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001492 # Make sure we have an info object
1493 if isinstance(name, ZipInfo):
1494 # 'name' is already an info object
1495 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001496 elif mode == 'w':
1497 zinfo = ZipInfo(name)
1498 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001499 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001500 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001501 # Get info object for name
1502 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001503
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001504 if mode == 'w':
1505 return self._open_to_write(zinfo, force_zip64=force_zip64)
1506
1507 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001508 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001509 "is an open writing handle on it. "
1510 "Close the writing handle before trying to read.")
1511
1512 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001513 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001514 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1515 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001516 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001517 # Skip the file header:
1518 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001519 if len(fheader) != sizeFileHeader:
1520 raise BadZipFile("Truncated file header")
1521 fheader = struct.unpack(structFileHeader, fheader)
1522 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001523 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001524
Antoine Pitrou17babc52012-11-17 23:50:08 +01001525 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1526 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1527 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001528
Antoine Pitrou8572da52012-11-17 23:52:05 +01001529 if zinfo.flag_bits & 0x20:
1530 # Zip 2.7: compressed patched data
1531 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001532
Antoine Pitrou8572da52012-11-17 23:52:05 +01001533 if zinfo.flag_bits & 0x40:
1534 # strong encryption
1535 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001536
Antoine Pitrou17babc52012-11-17 23:50:08 +01001537 if zinfo.flag_bits & 0x800:
1538 # UTF-8 filename
1539 fname_str = fname.decode("utf-8")
1540 else:
1541 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001542
Antoine Pitrou17babc52012-11-17 23:50:08 +01001543 if fname_str != zinfo.orig_filename:
1544 raise BadZipFile(
1545 'File name in directory %r and header %r differ.'
1546 % (zinfo.orig_filename, fname))
1547
1548 # check for encrypted flag & handle password
1549 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001550 if is_encrypted:
1551 if not pwd:
1552 pwd = self.pwd
1553 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001554 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001555 "required for extraction" % name)
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001556 else:
1557 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001558
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001559 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001560 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001561 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001562 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001563
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001564 def _open_to_write(self, zinfo, force_zip64=False):
1565 if force_zip64 and not self._allowZip64:
1566 raise ValueError(
1567 "force_zip64 is True, but allowZip64 was False when opening "
1568 "the ZIP file."
1569 )
1570 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001571 raise ValueError("Can't write to the ZIP file while there is "
1572 "another write handle open on it. "
1573 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001574
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001575 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001576 zinfo.compress_size = 0
1577 zinfo.CRC = 0
1578
1579 zinfo.flag_bits = 0x00
1580 if zinfo.compress_type == ZIP_LZMA:
1581 # Compressed data includes an end-of-stream (EOS) marker
1582 zinfo.flag_bits |= 0x02
1583 if not self._seekable:
1584 zinfo.flag_bits |= 0x08
1585
1586 if not zinfo.external_attr:
1587 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1588
1589 # Compressed size can be larger than uncompressed size
1590 zip64 = self._allowZip64 and \
1591 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1592
1593 if self._seekable:
1594 self.fp.seek(self.start_dir)
1595 zinfo.header_offset = self.fp.tell()
1596
1597 self._writecheck(zinfo)
1598 self._didModify = True
1599
1600 self.fp.write(zinfo.FileHeader(zip64))
1601
1602 self._writing = True
1603 return _ZipWriteFile(self, zinfo, zip64)
1604
Christian Heimes790c8232008-01-07 21:14:23 +00001605 def extract(self, member, path=None, pwd=None):
1606 """Extract a member from the archive to the current working directory,
1607 using its full name. Its file information is extracted as accurately
1608 as possible. `member' may be a filename or a ZipInfo object. You can
1609 specify a different directory using `path'.
1610 """
Christian Heimes790c8232008-01-07 21:14:23 +00001611 if path is None:
1612 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001613 else:
1614 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001615
1616 return self._extract_member(member, path, pwd)
1617
1618 def extractall(self, path=None, members=None, pwd=None):
1619 """Extract all members from the archive to the current working
1620 directory. `path' specifies a different directory to extract to.
1621 `members' is optional and must be a subset of the list returned
1622 by namelist().
1623 """
1624 if members is None:
1625 members = self.namelist()
1626
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001627 if path is None:
1628 path = os.getcwd()
1629 else:
1630 path = os.fspath(path)
1631
Christian Heimes790c8232008-01-07 21:14:23 +00001632 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001633 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001634
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001635 @classmethod
1636 def _sanitize_windows_name(cls, arcname, pathsep):
1637 """Replace bad characters and remove trailing dots from parts."""
1638 table = cls._windows_illegal_name_trans_table
1639 if not table:
1640 illegal = ':<>|"?*'
1641 table = str.maketrans(illegal, '_' * len(illegal))
1642 cls._windows_illegal_name_trans_table = table
1643 arcname = arcname.translate(table)
1644 # remove trailing dots
1645 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1646 # rejoin, removing empty parts.
1647 arcname = pathsep.join(x for x in arcname if x)
1648 return arcname
1649
Christian Heimes790c8232008-01-07 21:14:23 +00001650 def _extract_member(self, member, targetpath, pwd):
1651 """Extract the ZipInfo object 'member' to a physical
1652 file on the path targetpath.
1653 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001654 if not isinstance(member, ZipInfo):
1655 member = self.getinfo(member)
1656
Christian Heimes790c8232008-01-07 21:14:23 +00001657 # build the destination pathname, replacing
1658 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001659 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001660
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001661 if os.path.altsep:
1662 arcname = arcname.replace(os.path.altsep, os.path.sep)
1663 # interpret absolute pathname as relative, remove drive letter or
1664 # UNC path, redundant separators, "." and ".." components.
1665 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001666 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001667 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001668 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001669 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001670 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001671 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001672
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001673 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001674 targetpath = os.path.normpath(targetpath)
1675
1676 # Create all upper directories if necessary.
1677 upperdirs = os.path.dirname(targetpath)
1678 if upperdirs and not os.path.exists(upperdirs):
1679 os.makedirs(upperdirs)
1680
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001681 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001682 if not os.path.isdir(targetpath):
1683 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001684 return targetpath
1685
Antoine Pitrou17babc52012-11-17 23:50:08 +01001686 with self.open(member, pwd=pwd) as source, \
1687 open(targetpath, "wb") as target:
1688 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001689
1690 return targetpath
1691
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001692 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001693 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001694 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001695 import warnings
1696 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001697 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001698 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001699 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001700 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001701 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001702 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001703 if not self._allowZip64:
1704 requires_zip64 = None
1705 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1706 requires_zip64 = "Files count"
1707 elif zinfo.file_size > ZIP64_LIMIT:
1708 requires_zip64 = "Filesize"
1709 elif zinfo.header_offset > ZIP64_LIMIT:
1710 requires_zip64 = "Zipfile size"
1711 if requires_zip64:
1712 raise LargeZipFile(requires_zip64 +
1713 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001714
Bo Baylesce237c72018-01-29 23:54:07 -06001715 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001716 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001717 """Put the bytes from filename into the archive under the name
1718 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001719 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001720 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001721 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001722 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001723 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001724 "Can't write to ZIP archive while an open writing handle exists"
1725 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001726
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001727 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001728 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001729
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001730 if zinfo.is_dir():
1731 zinfo.compress_size = 0
1732 zinfo.CRC = 0
1733 else:
1734 if compress_type is not None:
1735 zinfo.compress_type = compress_type
1736 else:
1737 zinfo.compress_type = self.compression
1738
Bo Baylesce237c72018-01-29 23:54:07 -06001739 if compresslevel is not None:
1740 zinfo._compresslevel = compresslevel
1741 else:
1742 zinfo._compresslevel = self.compresslevel
1743
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001744 if zinfo.is_dir():
1745 with self._lock:
1746 if self._seekable:
1747 self.fp.seek(self.start_dir)
1748 zinfo.header_offset = self.fp.tell() # Start of header bytes
1749 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001750 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001751 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001752
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001753 self._writecheck(zinfo)
1754 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001755
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001756 self.filelist.append(zinfo)
1757 self.NameToInfo[zinfo.filename] = zinfo
1758 self.fp.write(zinfo.FileHeader(False))
1759 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001760 else:
1761 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1762 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001763
Bo Baylesce237c72018-01-29 23:54:07 -06001764 def writestr(self, zinfo_or_arcname, data,
1765 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001766 """Write a file into the archive. The contents is 'data', which
1767 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1768 it is encoded as UTF-8 first.
1769 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001770 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001771 if isinstance(data, str):
1772 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001773 if not isinstance(zinfo_or_arcname, ZipInfo):
1774 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001775 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001776 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001777 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001778 if zinfo.filename[-1] == '/':
1779 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1780 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1781 else:
1782 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001783 else:
1784 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001785
1786 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001787 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001788 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001789 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001790 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001791 "Can't write to ZIP archive while an open writing handle exists."
1792 )
1793
1794 if compress_type is not None:
1795 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001796
Bo Baylesce237c72018-01-29 23:54:07 -06001797 if compresslevel is not None:
1798 zinfo._compresslevel = compresslevel
1799
Guido van Rossum85825dc2007-08-27 17:03:28 +00001800 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001801 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001802 with self.open(zinfo, mode='w') as dest:
1803 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001804
1805 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001806 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001807 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001808
1809 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001810 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001811 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001812 if self.fp is None:
1813 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001814
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001815 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001816 raise ValueError("Can't close the ZIP file while there is "
1817 "an open writing handle on it. "
1818 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001819
Antoine Pitrou17babc52012-11-17 23:50:08 +01001820 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001821 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001822 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001823 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001824 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001825 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001826 finally:
1827 fp = self.fp
1828 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001829 self._fpclose(fp)
1830
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001831 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001832 for zinfo in self.filelist: # write central directory
1833 dt = zinfo.date_time
1834 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1835 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1836 extra = []
1837 if zinfo.file_size > ZIP64_LIMIT \
1838 or zinfo.compress_size > ZIP64_LIMIT:
1839 extra.append(zinfo.file_size)
1840 extra.append(zinfo.compress_size)
1841 file_size = 0xffffffff
1842 compress_size = 0xffffffff
1843 else:
1844 file_size = zinfo.file_size
1845 compress_size = zinfo.compress_size
1846
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001847 if zinfo.header_offset > ZIP64_LIMIT:
1848 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001849 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001850 else:
1851 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001852
1853 extra_data = zinfo.extra
1854 min_version = 0
1855 if extra:
1856 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001857 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001858 extra_data = struct.pack(
1859 '<HH' + 'Q'*len(extra),
1860 1, 8*len(extra), *extra) + extra_data
1861
1862 min_version = ZIP64_VERSION
1863
1864 if zinfo.compress_type == ZIP_BZIP2:
1865 min_version = max(BZIP2_VERSION, min_version)
1866 elif zinfo.compress_type == ZIP_LZMA:
1867 min_version = max(LZMA_VERSION, min_version)
1868
1869 extract_version = max(min_version, zinfo.extract_version)
1870 create_version = max(min_version, zinfo.create_version)
Victor Stinner1d3b0aa2020-01-17 15:17:48 +01001871 filename, flag_bits = zinfo._encodeFilenameFlags()
1872 centdir = struct.pack(structCentralDir,
1873 stringCentralDir, create_version,
1874 zinfo.create_system, extract_version, zinfo.reserved,
1875 flag_bits, zinfo.compress_type, dostime, dosdate,
1876 zinfo.CRC, compress_size, file_size,
1877 len(filename), len(extra_data), len(zinfo.comment),
1878 0, zinfo.internal_attr, zinfo.external_attr,
1879 header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001880 self.fp.write(centdir)
1881 self.fp.write(filename)
1882 self.fp.write(extra_data)
1883 self.fp.write(zinfo.comment)
1884
1885 pos2 = self.fp.tell()
1886 # Write end-of-zip-archive record
1887 centDirCount = len(self.filelist)
1888 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001889 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001890 requires_zip64 = None
1891 if centDirCount > ZIP_FILECOUNT_LIMIT:
1892 requires_zip64 = "Files count"
1893 elif centDirOffset > ZIP64_LIMIT:
1894 requires_zip64 = "Central directory offset"
1895 elif centDirSize > ZIP64_LIMIT:
1896 requires_zip64 = "Central directory size"
1897 if requires_zip64:
1898 # Need to write the ZIP64 end-of-archive records
1899 if not self._allowZip64:
1900 raise LargeZipFile(requires_zip64 +
1901 " would require ZIP64 extensions")
1902 zip64endrec = struct.pack(
1903 structEndArchive64, stringEndArchive64,
1904 44, 45, 45, 0, 0, centDirCount, centDirCount,
1905 centDirSize, centDirOffset)
1906 self.fp.write(zip64endrec)
1907
1908 zip64locrec = struct.pack(
1909 structEndArchive64Locator,
1910 stringEndArchive64Locator, 0, pos2, 1)
1911 self.fp.write(zip64locrec)
1912 centDirCount = min(centDirCount, 0xFFFF)
1913 centDirSize = min(centDirSize, 0xFFFFFFFF)
1914 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1915
1916 endrec = struct.pack(structEndArchive, stringEndArchive,
1917 0, 0, centDirCount, centDirCount,
1918 centDirSize, centDirOffset, len(self._comment))
1919 self.fp.write(endrec)
1920 self.fp.write(self._comment)
1921 self.fp.flush()
1922
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001923 def _fpclose(self, fp):
1924 assert self._fileRefCnt > 0
1925 self._fileRefCnt -= 1
1926 if not self._fileRefCnt and not self._filePassed:
1927 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001928
1929
1930class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001931 """Class to create ZIP archives with Python library files and packages."""
1932
Georg Brandl8334fd92010-12-04 10:26:46 +00001933 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001934 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001935 ZipFile.__init__(self, file, mode=mode, compression=compression,
1936 allowZip64=allowZip64)
1937 self._optimize = optimize
1938
Christian Tismer59202e52013-10-21 03:59:23 +02001939 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001940 """Add all files from "pathname" to the ZIP archive.
1941
Fred Drake484d7352000-10-02 21:14:52 +00001942 If pathname is a package directory, search the directory and
1943 all package subdirectories recursively for all *.py and enter
1944 the modules into the archive. If pathname is a plain
1945 directory, listdir *.py and enter all modules. Else, pathname
1946 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001947 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001948 This method will compile the module.py into module.pyc if
1949 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001950 If filterfunc(pathname) is given, it is called with every argument.
1951 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001952 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001953 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001954 if filterfunc and not filterfunc(pathname):
1955 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001956 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001957 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001958 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001959 dir, name = os.path.split(pathname)
1960 if os.path.isdir(pathname):
1961 initname = os.path.join(pathname, "__init__.py")
1962 if os.path.isfile(initname):
1963 # This is a package directory, add it
1964 if basename:
1965 basename = "%s/%s" % (basename, name)
1966 else:
1967 basename = name
1968 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001969 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001970 fname, arcname = self._get_codename(initname[0:-3], basename)
1971 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001972 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001973 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001974 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001975 dirlist.remove("__init__.py")
1976 # Add all *.py files and package subdirectories
1977 for filename in dirlist:
1978 path = os.path.join(pathname, filename)
1979 root, ext = os.path.splitext(filename)
1980 if os.path.isdir(path):
1981 if os.path.isfile(os.path.join(path, "__init__.py")):
1982 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001983 self.writepy(path, basename,
1984 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001985 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001986 if filterfunc and not filterfunc(path):
1987 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001988 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001989 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001990 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001991 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001992 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001993 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001994 self.write(fname, arcname)
1995 else:
1996 # This is NOT a package directory, add its files at top level
1997 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001998 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001999 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002000 path = os.path.join(pathname, filename)
2001 root, ext = os.path.splitext(filename)
2002 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002003 if filterfunc and not filterfunc(path):
2004 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002005 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002006 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002007 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002008 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002009 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002010 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002011 self.write(fname, arcname)
2012 else:
2013 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002014 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002015 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002016 fname, arcname = self._get_codename(pathname[0:-3], basename)
2017 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002018 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002019 self.write(fname, arcname)
2020
2021 def _get_codename(self, pathname, basename):
2022 """Return (filename, archivename) for the path.
2023
Fred Drake484d7352000-10-02 21:14:52 +00002024 Given a module name path, return the correct file path and
2025 archive name, compiling if necessary. For example, given
2026 /python/lib/string, return (/python/lib/string.pyc, string).
2027 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002028 def _compile(file, optimize=-1):
2029 import py_compile
2030 if self.debug:
2031 print("Compiling", file)
2032 try:
2033 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002034 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002035 print(err.msg)
2036 return False
2037 return True
2038
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002039 file_py = pathname + ".py"
2040 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002041 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2042 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2043 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002044 if self._optimize == -1:
2045 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002046 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002047 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2048 # Use .pyc file.
2049 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002050 elif (os.path.isfile(pycache_opt0) and
2051 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002052 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2053 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002054 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002055 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002056 elif (os.path.isfile(pycache_opt1) and
2057 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2058 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002059 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002060 fname = pycache_opt1
2061 arcname = file_pyc
2062 elif (os.path.isfile(pycache_opt2) and
2063 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2064 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2065 # file name in the archive.
2066 fname = pycache_opt2
2067 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002068 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002069 # Compile py into PEP 3147 pyc file.
2070 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002071 if sys.flags.optimize == 0:
2072 fname = pycache_opt0
2073 elif sys.flags.optimize == 1:
2074 fname = pycache_opt1
2075 else:
2076 fname = pycache_opt2
2077 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002078 else:
2079 fname = arcname = file_py
2080 else:
2081 # new mode: use given optimization level
2082 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002083 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002084 arcname = file_pyc
2085 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002086 arcname = file_pyc
2087 if self._optimize == 1:
2088 fname = pycache_opt1
2089 elif self._optimize == 2:
2090 fname = pycache_opt2
2091 else:
2092 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2093 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002094 if not (os.path.isfile(fname) and
2095 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2096 if not _compile(file_py, optimize=self._optimize):
2097 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002098 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002099 if basename:
2100 archivename = "%s/%s" % (basename, archivename)
2101 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002102
2103
shireenraoa4e29912019-08-24 11:26:41 -04002104def _parents(path):
2105 """
2106 Given a path with elements separated by
2107 posixpath.sep, generate all parents of that path.
2108
2109 >>> list(_parents('b/d'))
2110 ['b']
2111 >>> list(_parents('/b/d/'))
2112 ['/b']
2113 >>> list(_parents('b/d/f/'))
2114 ['b/d', 'b']
2115 >>> list(_parents('b'))
2116 []
2117 >>> list(_parents(''))
2118 []
2119 """
2120 return itertools.islice(_ancestry(path), 1, None)
2121
2122
2123def _ancestry(path):
2124 """
2125 Given a path with elements separated by
2126 posixpath.sep, generate all elements of that path
2127
2128 >>> list(_ancestry('b/d'))
2129 ['b/d', 'b']
2130 >>> list(_ancestry('/b/d/'))
2131 ['/b/d', '/b']
2132 >>> list(_ancestry('b/d/f/'))
2133 ['b/d/f', 'b/d', 'b']
2134 >>> list(_ancestry('b'))
2135 ['b']
2136 >>> list(_ancestry(''))
2137 []
2138 """
2139 path = path.rstrip(posixpath.sep)
2140 while path and path != posixpath.sep:
2141 yield path
2142 path, tail = posixpath.split(path)
2143
2144
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002145_dedupe = dict.fromkeys
2146"""Deduplicate an iterable in original order"""
2147
2148
2149def _difference(minuend, subtrahend):
2150 """
2151 Return items in minuend not in subtrahend, retaining order
2152 with O(1) lookup.
2153 """
2154 return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2155
2156
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002157class CompleteDirs(ZipFile):
2158 """
2159 A ZipFile subclass that ensures that implied directories
2160 are always included in the namelist.
2161 """
2162
2163 @staticmethod
2164 def _implied_dirs(names):
2165 parents = itertools.chain.from_iterable(map(_parents, names))
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002166 as_dirs = (p + posixpath.sep for p in parents)
2167 return _dedupe(_difference(as_dirs, names))
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002168
2169 def namelist(self):
2170 names = super(CompleteDirs, self).namelist()
2171 return names + list(self._implied_dirs(names))
2172
2173 def _name_set(self):
2174 return set(self.namelist())
2175
2176 def resolve_dir(self, name):
2177 """
2178 If the name represents a directory, return that name
2179 as a directory (with the trailing slash).
2180 """
2181 names = self._name_set()
2182 dirname = name + '/'
2183 dir_match = name not in names and dirname in names
2184 return dirname if dir_match else name
2185
2186 @classmethod
2187 def make(cls, source):
2188 """
2189 Given a source (filename or zipfile), return an
2190 appropriate CompleteDirs subclass.
2191 """
2192 if isinstance(source, CompleteDirs):
2193 return source
2194
2195 if not isinstance(source, ZipFile):
2196 return cls(source)
2197
2198 # Only allow for FastPath when supplied zipfile is read-only
2199 if 'r' not in source.mode:
2200 cls = CompleteDirs
2201
2202 res = cls.__new__(cls)
2203 vars(res).update(vars(source))
2204 return res
2205
2206
2207class FastLookup(CompleteDirs):
2208 """
2209 ZipFile subclass to ensure implicit
2210 dirs exist and are resolved rapidly.
2211 """
2212 def namelist(self):
2213 with contextlib.suppress(AttributeError):
2214 return self.__names
2215 self.__names = super(FastLookup, self).namelist()
2216 return self.__names
2217
2218 def _name_set(self):
2219 with contextlib.suppress(AttributeError):
2220 return self.__lookup
2221 self.__lookup = super(FastLookup, self)._name_set()
2222 return self.__lookup
2223
2224
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002225class Path:
2226 """
2227 A pathlib-compatible interface for zip files.
2228
2229 Consider a zip file with this structure::
2230
2231 .
2232 ├── a.txt
2233 └── b
2234 ├── c.txt
2235 └── d
2236 └── e.txt
2237
2238 >>> data = io.BytesIO()
2239 >>> zf = ZipFile(data, 'w')
2240 >>> zf.writestr('a.txt', 'content of a')
2241 >>> zf.writestr('b/c.txt', 'content of c')
2242 >>> zf.writestr('b/d/e.txt', 'content of e')
2243 >>> zf.filename = 'abcde.zip'
2244
2245 Path accepts the zipfile object itself or a filename
2246
2247 >>> root = Path(zf)
2248
2249 From there, several path operations are available.
2250
2251 Directory iteration (including the zip file itself):
2252
2253 >>> a, b = root.iterdir()
2254 >>> a
2255 Path('abcde.zip', 'a.txt')
2256 >>> b
2257 Path('abcde.zip', 'b/')
2258
2259 name property:
2260
2261 >>> b.name
2262 'b'
2263
2264 join with divide operator:
2265
2266 >>> c = b / 'c.txt'
2267 >>> c
2268 Path('abcde.zip', 'b/c.txt')
2269 >>> c.name
2270 'c.txt'
2271
2272 Read text:
2273
2274 >>> c.read_text()
2275 'content of c'
2276
2277 existence:
2278
2279 >>> c.exists()
2280 True
2281 >>> (b / 'missing.txt').exists()
2282 False
2283
Xtreak0d702272019-06-03 04:42:33 +05302284 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002285
2286 >>> str(c)
2287 'abcde.zip/b/c.txt'
2288 """
2289
2290 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2291
2292 def __init__(self, root, at=""):
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002293 self.root = FastLookup.make(root)
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002294 self.at = at
2295
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002296 def open(self, mode='r', *args, **kwargs):
2297 """
2298 Open this entry as text or binary following the semantics
2299 of ``pathlib.Path.open()`` by passing arguments through
2300 to io.TextIOWrapper().
2301 """
2302 pwd = kwargs.pop('pwd', None)
2303 zip_mode = mode[0]
2304 stream = self.root.open(self.at, zip_mode, pwd=pwd)
2305 if 'b' in mode:
2306 if args or kwargs:
2307 raise ValueError("encoding args invalid for binary operation")
2308 return stream
2309 return io.TextIOWrapper(stream, *args, **kwargs)
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002310
2311 @property
2312 def name(self):
2313 return posixpath.basename(self.at.rstrip("/"))
2314
2315 def read_text(self, *args, **kwargs):
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002316 with self.open('r', *args, **kwargs) as strm:
2317 return strm.read()
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002318
2319 def read_bytes(self):
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002320 with self.open('rb') as strm:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002321 return strm.read()
2322
2323 def _is_child(self, path):
2324 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2325
2326 def _next(self, at):
2327 return Path(self.root, at)
2328
2329 def is_dir(self):
2330 return not self.at or self.at.endswith("/")
2331
2332 def is_file(self):
2333 return not self.is_dir()
2334
2335 def exists(self):
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002336 return self.at in self.root._name_set()
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002337
2338 def iterdir(self):
2339 if not self.is_dir():
2340 raise ValueError("Can't listdir a file")
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002341 subs = map(self._next, self.root.namelist())
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002342 return filter(self._is_child, subs)
2343
2344 def __str__(self):
2345 return posixpath.join(self.root.filename, self.at)
2346
2347 def __repr__(self):
2348 return self.__repr.format(self=self)
2349
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002350 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002351 next = posixpath.join(self.at, add)
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002352 return self._next(self.root.resolve_dir(next))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002353
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002354 __truediv__ = joinpath
2355
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002356 @property
2357 def parent(self):
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002358 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002359 if parent_at:
2360 parent_at += '/'
2361 return self._next(parent_at)
2362
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002363
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002364def main(args=None):
2365 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002366
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002367 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002368 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002369 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002370 group.add_argument('-l', '--list', metavar='<zipfile>',
2371 help='Show listing of a zipfile')
2372 group.add_argument('-e', '--extract', nargs=2,
2373 metavar=('<zipfile>', '<output_dir>'),
2374 help='Extract zipfile into target dir')
2375 group.add_argument('-c', '--create', nargs='+',
2376 metavar=('<name>', '<file>'),
2377 help='Create zipfile from sources')
2378 group.add_argument('-t', '--test', metavar='<zipfile>',
2379 help='Test if a zipfile is valid')
2380 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002381
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002382 if args.test is not None:
2383 src = args.test
2384 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002385 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002386 if badfile:
2387 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002388 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002389
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002390 elif args.list is not None:
2391 src = args.list
2392 with ZipFile(src, 'r') as zf:
2393 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002394
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002395 elif args.extract is not None:
2396 src, curdir = args.extract
2397 with ZipFile(src, 'r') as zf:
2398 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002399
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002400 elif args.create is not None:
2401 zip_name = args.create.pop(0)
2402 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002403
2404 def addToZip(zf, path, zippath):
2405 if os.path.isfile(path):
2406 zf.write(path, zippath, ZIP_DEFLATED)
2407 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002408 if zippath:
2409 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002410 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002411 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002412 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002413 # else: ignore
2414
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002415 with ZipFile(zip_name, 'w') as zf:
2416 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002417 zippath = os.path.basename(path)
2418 if not zippath:
2419 zippath = os.path.basename(os.path.dirname(path))
2420 if zippath in ('', os.curdir, os.pardir):
2421 zippath = ''
2422 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002423
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002424
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002425if __name__ == "__main__":
2426 main()