blob: 8b99c1189baa8fe4c88b42f751d48e43f1279a8b [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
Miss Islington (bot)c410f382019-08-24 09:03:52 -070010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000380 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000381 # CRC CRC-32 of the uncompressed file
382 # compress_size Size of the compressed file
383 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000468 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000469 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000471 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000473 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 elif ln == 0:
475 counts = ()
476 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300477 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478
479 idx = 0
480
481 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000482 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Miss Skeleton (bot)3801b262019-10-29 00:44:07 -0700483 if len(counts) <= idx:
484 raise BadZipFile(
485 "Corrupt zip64 extra field. File size not found."
486 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000487 self.file_size = counts[idx]
488 idx += 1
489
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000490 if self.compress_size == 0xFFFFFFFF:
Miss Skeleton (bot)3801b262019-10-29 00:44:07 -0700491 if len(counts) <= idx:
492 raise BadZipFile(
493 "Corrupt zip64 extra field. Compress size not found."
494 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000495 self.compress_size = counts[idx]
496 idx += 1
497
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000498 if self.header_offset == 0xffffffff:
Miss Skeleton (bot)3801b262019-10-29 00:44:07 -0700499 if len(counts) <= idx:
500 raise BadZipFile(
501 "Corrupt zip64 extra field. Header offset not found."
502 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000503 old = self.header_offset
504 self.header_offset = counts[idx]
505 idx+=1
506
507 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000508
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200510 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200511 """Construct an appropriate ZipInfo for a file on the filesystem.
512
513 filename should be the path to a file or directory on the filesystem.
514
515 arcname is the name which it will have within the archive (by default,
516 this will be the same as filename, but without a drive letter and with
517 leading path separators removed).
518 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200519 if isinstance(filename, os.PathLike):
520 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200521 st = os.stat(filename)
522 isdir = stat.S_ISDIR(st.st_mode)
523 mtime = time.localtime(st.st_mtime)
524 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200525 if not strict_timestamps and date_time[0] < 1980:
526 date_time = (1980, 1, 1, 0, 0, 0)
527 elif not strict_timestamps and date_time[0] > 2107:
528 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200529 # Create ZipInfo instance to store file information
530 if arcname is None:
531 arcname = filename
532 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
533 while arcname[0] in (os.sep, os.altsep):
534 arcname = arcname[1:]
535 if isdir:
536 arcname += '/'
537 zinfo = cls(arcname, date_time)
538 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
539 if isdir:
540 zinfo.file_size = 0
541 zinfo.external_attr |= 0x10 # MS-DOS directory flag
542 else:
543 zinfo.file_size = st.st_size
544
545 return zinfo
546
547 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300548 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200549 return self.filename[-1] == '/'
550
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
553# internal keys. We noticed that a direct implementation is faster than
554# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000555
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300556_crctable = None
557def _gen_crc(crc):
558 for j in range(8):
559 if crc & 1:
560 crc = (crc >> 1) ^ 0xEDB88320
561 else:
562 crc >>= 1
563 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000564
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300565# ZIP supports a password-based form of encryption. Even though known
566# plaintext attacks have been found against it, it is still useful
567# to be able to get data out of such a file.
568#
569# Usage:
570# zd = _ZipDecrypter(mypwd)
571# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573def _ZipDecrypter(pwd):
574 key0 = 305419896
575 key1 = 591751049
576 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000577
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300578 global _crctable
579 if _crctable is None:
580 _crctable = list(map(_gen_crc, range(256)))
581 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000582
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300583 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000584 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300585 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000586
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300587 def update_keys(c):
588 nonlocal key0, key1, key2
589 key0 = crc32(c, key0)
590 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
591 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
592 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000593
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300594 for p in pwd:
595 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000596
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300597 def decrypter(data):
598 """Decrypt a bytes object."""
599 result = bytearray()
600 append = result.append
601 for c in data:
602 k = key2 | 2
603 c ^= ((k * (k^1)) >> 8) & 0xFF
604 update_keys(c)
605 append(c)
606 return bytes(result)
607
608 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000609
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200610
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611class LZMACompressor:
612
613 def __init__(self):
614 self._comp = None
615
616 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200617 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200618 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200619 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200620 ])
621 return struct.pack('<BBH', 9, 4, len(props)) + props
622
623 def compress(self, data):
624 if self._comp is None:
625 return self._init() + self._comp.compress(data)
626 return self._comp.compress(data)
627
628 def flush(self):
629 if self._comp is None:
630 return self._init() + self._comp.flush()
631 return self._comp.flush()
632
633
634class LZMADecompressor:
635
636 def __init__(self):
637 self._decomp = None
638 self._unconsumed = b''
639 self.eof = False
640
641 def decompress(self, data):
642 if self._decomp is None:
643 self._unconsumed += data
644 if len(self._unconsumed) <= 4:
645 return b''
646 psize, = struct.unpack('<H', self._unconsumed[2:4])
647 if len(self._unconsumed) <= 4 + psize:
648 return b''
649
650 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200651 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
652 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200653 ])
654 data = self._unconsumed[4 + psize:]
655 del self._unconsumed
656
657 result = self._decomp.decompress(data)
658 self.eof = self._decomp.eof
659 return result
660
661
662compressor_names = {
663 0: 'store',
664 1: 'shrink',
665 2: 'reduce',
666 3: 'reduce',
667 4: 'reduce',
668 5: 'reduce',
669 6: 'implode',
670 7: 'tokenize',
671 8: 'deflate',
672 9: 'deflate64',
673 10: 'implode',
674 12: 'bzip2',
675 14: 'lzma',
676 18: 'terse',
677 19: 'lz77',
678 97: 'wavpack',
679 98: 'ppmd',
680}
681
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200682def _check_compression(compression):
683 if compression == ZIP_STORED:
684 pass
685 elif compression == ZIP_DEFLATED:
686 if not zlib:
687 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200688 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200689 elif compression == ZIP_BZIP2:
690 if not bz2:
691 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200692 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200693 elif compression == ZIP_LZMA:
694 if not lzma:
695 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200696 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200697 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300698 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200699
700
Bo Baylesce237c72018-01-29 23:54:07 -0600701def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200702 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600703 if compresslevel is not None:
704 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
705 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200706 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600707 if compresslevel is not None:
708 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200709 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600710 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200711 elif compress_type == ZIP_LZMA:
712 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713 else:
714 return None
715
716
717def _get_decompressor(compress_type):
Miss Islington (bot)717cc612019-09-12 07:33:53 -0700718 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200719 if compress_type == ZIP_STORED:
720 return None
721 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200722 return zlib.decompressobj(-15)
723 elif compress_type == ZIP_BZIP2:
724 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200725 elif compress_type == ZIP_LZMA:
726 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200727 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200728 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200729 if descr:
730 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
731 else:
732 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200733
734
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200735class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300736 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200737 self._file = file
738 self._pos = pos
739 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200740 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300741 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700742 self.seekable = file.seekable
743 self.tell = file.tell
744
745 def seek(self, offset, whence=0):
746 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200747 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700748 raise ValueError("Can't reposition in the ZIP file while "
749 "there is an open writing handle on it. "
750 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200751 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700752 self._pos = self._file.tell()
753 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200754
755 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200756 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300757 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300758 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300759 "is an open writing handle on it. "
760 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200761 self._file.seek(self._pos)
762 data = self._file.read(n)
763 self._pos = self._file.tell()
764 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200765
766 def close(self):
767 if self._file is not None:
768 fileobj = self._file
769 self._file = None
770 self._close(fileobj)
771
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200772# Provide the tell method for unseekable stream
773class _Tellable:
774 def __init__(self, fp):
775 self.fp = fp
776 self.offset = 0
777
778 def write(self, data):
779 n = self.fp.write(data)
780 self.offset += n
781 return n
782
783 def tell(self):
784 return self.offset
785
786 def flush(self):
787 self.fp.flush()
788
789 def close(self):
790 self.fp.close()
791
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200792
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 """File-like object for reading an archive member.
795 Is returned by ZipFile.open().
796 """
797
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798 # Max size supported by decompressor.
799 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000801 # Read from compressed files in 4k blocks.
802 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000803
John Jolly066df4f2018-01-30 01:51:35 -0700804 # Chunk size to read during seek
805 MAX_SEEK_READ = 1 << 24
806
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -0700807 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000808 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000809 self._fileobj = fileobj
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -0700810 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000811 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000812
Ezio Melotti92b47432010-01-28 01:44:41 +0000813 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000814 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200815 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000816
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200817 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000818
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200819 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000820 self._readbuffer = b''
821 self._offset = 0
822
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000823 self.newlines = None
824
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000825 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 self.name = zipinfo.filename
827
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000828 if hasattr(zipinfo, 'CRC'):
829 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000830 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000831 else:
832 self._expected_crc = None
833
John Jolly066df4f2018-01-30 01:51:35 -0700834 self._seekable = False
835 try:
836 if fileobj.seekable():
837 self._orig_compress_start = fileobj.tell()
838 self._orig_compress_size = zipinfo.compress_size
839 self._orig_file_size = zipinfo.file_size
840 self._orig_start_crc = self._running_crc
841 self._seekable = True
842 except AttributeError:
843 pass
844
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -0700845 self._decrypter = None
846 if pwd:
847 if zipinfo.flag_bits & 0x8:
848 # compare against the file type from extended local headers
849 check_byte = (zipinfo._raw_time >> 8) & 0xff
850 else:
851 # compare against the CRC otherwise
852 check_byte = (zipinfo.CRC >> 24) & 0xff
853 h = self._init_decrypter()
854 if h != check_byte:
855 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
856
857
858 def _init_decrypter(self):
859 self._decrypter = _ZipDecrypter(self._pwd)
860 # The first 12 bytes in the cypher stream is an encryption header
861 # used to strengthen the algorithm. The first 11 bytes are
862 # completely random, while the 12th contains the MSB of the CRC,
863 # or the MSB of the file time depending on the header type
864 # and is used to check the correctness of the password.
865 header = self._fileobj.read(12)
866 self._compress_left -= 12
867 return self._decrypter(header)[11]
868
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200869 def __repr__(self):
870 result = ['<%s.%s' % (self.__class__.__module__,
871 self.__class__.__qualname__)]
872 if not self.closed:
873 result.append(' name=%r mode=%r' % (self.name, self.mode))
874 if self._compress_type != ZIP_STORED:
875 result.append(' compress_type=%s' %
876 compressor_names.get(self._compress_type,
877 self._compress_type))
878 else:
879 result.append(' [closed]')
880 result.append('>')
881 return ''.join(result)
882
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000883 def readline(self, limit=-1):
884 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000886 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888
Serhiy Storchakae670be22016-06-11 19:32:44 +0300889 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000890 # Shortcut common case - newline found in buffer.
891 i = self._readbuffer.find(b'\n', self._offset) + 1
892 if i > 0:
893 line = self._readbuffer[self._offset: i]
894 self._offset = i
895 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896
Serhiy Storchakae670be22016-06-11 19:32:44 +0300897 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000898
899 def peek(self, n=1):
900 """Returns buffered bytes without advancing the position."""
901 if n > len(self._readbuffer) - self._offset:
902 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 if len(chunk) > self._offset:
904 self._readbuffer = chunk + self._readbuffer[self._offset:]
905 self._offset = 0
906 else:
907 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000908
909 # Return up to 512 bytes to reduce allocation overhead for tight loops.
910 return self._readbuffer[self._offset: self._offset + 512]
911
912 def readable(self):
913 return True
914
915 def read(self, n=-1):
916 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800917 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200919 if n is None or n < 0:
920 buf = self._readbuffer[self._offset:]
921 self._readbuffer = b''
922 self._offset = 0
923 while not self._eof:
924 buf += self._read1(self.MAX_N)
925 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926
Antoine Pitrou78157b32012-06-23 16:44:48 +0200927 end = n + self._offset
928 if end < len(self._readbuffer):
929 buf = self._readbuffer[self._offset:end]
930 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200931 return buf
932
Antoine Pitrou78157b32012-06-23 16:44:48 +0200933 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 buf = self._readbuffer[self._offset:]
935 self._readbuffer = b''
936 self._offset = 0
937 while n > 0 and not self._eof:
938 data = self._read1(n)
939 if n < len(data):
940 self._readbuffer = data
941 self._offset = n
942 buf += data[:n]
943 break
944 buf += data
945 n -= len(data)
946 return buf
947
948 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000949 # Update the CRC using the given data.
950 if self._expected_crc is None:
951 # No need to compute the CRC if we don't have a reference value
952 return
Martin Panterb82032f2015-12-11 05:19:29 +0000953 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000954 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200955 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000956 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000957
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000958 def read1(self, n):
959 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000960
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200961 if n is None or n < 0:
962 buf = self._readbuffer[self._offset:]
963 self._readbuffer = b''
964 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300965 while not self._eof:
966 data = self._read1(self.MAX_N)
967 if data:
968 buf += data
969 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200970 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000971
Antoine Pitrou78157b32012-06-23 16:44:48 +0200972 end = n + self._offset
973 if end < len(self._readbuffer):
974 buf = self._readbuffer[self._offset:end]
975 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200976 return buf
977
Antoine Pitrou78157b32012-06-23 16:44:48 +0200978 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200979 buf = self._readbuffer[self._offset:]
980 self._readbuffer = b''
981 self._offset = 0
982 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300983 while not self._eof:
984 data = self._read1(n)
985 if n < len(data):
986 self._readbuffer = data
987 self._offset = n
988 buf += data[:n]
989 break
990 if data:
991 buf += data
992 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200993 return buf
994
995 def _read1(self, n):
996 # Read up to n compressed bytes with at most one read() system call,
997 # decrypt and decompress them.
998 if self._eof or n <= 0:
999 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +00001000
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001001 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001002 if self._compress_type == ZIP_DEFLATED:
1003 ## Handle unconsumed data.
1004 data = self._decompressor.unconsumed_tail
1005 if n > len(data):
1006 data += self._read2(n - len(data))
1007 else:
1008 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001009
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001010 if self._compress_type == ZIP_STORED:
1011 self._eof = self._compress_left <= 0
1012 elif self._compress_type == ZIP_DEFLATED:
1013 n = max(n, self.MIN_READ_SIZE)
1014 data = self._decompressor.decompress(data, n)
1015 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +02001016 self._compress_left <= 0 and
1017 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001018 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001019 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001020 else:
1021 data = self._decompressor.decompress(data)
1022 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001024 data = data[:self._left]
1025 self._left -= len(data)
1026 if self._left <= 0:
1027 self._eof = True
1028 self._update_crc(data)
1029 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001030
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001031 def _read2(self, n):
1032 if self._compress_left <= 0:
1033 return b''
1034
1035 n = max(n, self.MIN_READ_SIZE)
1036 n = min(n, self._compress_left)
1037
1038 data = self._fileobj.read(n)
1039 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001040 if not data:
1041 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001042
1043 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001044 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001045 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001046
Łukasz Langae94980a2010-11-22 23:31:26 +00001047 def close(self):
1048 try:
1049 if self._close_fileobj:
1050 self._fileobj.close()
1051 finally:
1052 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001053
John Jolly066df4f2018-01-30 01:51:35 -07001054 def seekable(self):
1055 return self._seekable
1056
1057 def seek(self, offset, whence=0):
1058 if not self._seekable:
1059 raise io.UnsupportedOperation("underlying stream is not seekable")
1060 curr_pos = self.tell()
1061 if whence == 0: # Seek from start of file
1062 new_pos = offset
1063 elif whence == 1: # Seek from current position
1064 new_pos = curr_pos + offset
1065 elif whence == 2: # Seek from EOF
1066 new_pos = self._orig_file_size + offset
1067 else:
1068 raise ValueError("whence must be os.SEEK_SET (0), "
1069 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1070
1071 if new_pos > self._orig_file_size:
1072 new_pos = self._orig_file_size
1073
1074 if new_pos < 0:
1075 new_pos = 0
1076
1077 read_offset = new_pos - curr_pos
1078 buff_offset = read_offset + self._offset
1079
1080 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1081 # Just move the _offset index if the new position is in the _readbuffer
1082 self._offset = buff_offset
1083 read_offset = 0
1084 elif read_offset < 0:
1085 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001086 self._fileobj.seek(self._orig_compress_start)
1087 self._running_crc = self._orig_start_crc
1088 self._compress_left = self._orig_compress_size
1089 self._left = self._orig_file_size
1090 self._readbuffer = b''
1091 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001092 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001093 self._eof = False
1094 read_offset = new_pos
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -07001095 if self._decrypter is not None:
1096 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001097
1098 while read_offset > 0:
1099 read_len = min(self.MAX_SEEK_READ, read_offset)
1100 self.read(read_len)
1101 read_offset -= read_len
1102
1103 return self.tell()
1104
1105 def tell(self):
1106 if not self._seekable:
1107 raise io.UnsupportedOperation("underlying stream is not seekable")
1108 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1109 return filepos
1110
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001111
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001112class _ZipWriteFile(io.BufferedIOBase):
1113 def __init__(self, zf, zinfo, zip64):
1114 self._zinfo = zinfo
1115 self._zip64 = zip64
1116 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001117 self._compressor = _get_compressor(zinfo.compress_type,
1118 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001119 self._file_size = 0
1120 self._compress_size = 0
1121 self._crc = 0
1122
1123 @property
1124 def _fileobj(self):
1125 return self._zipfile.fp
1126
1127 def writable(self):
1128 return True
1129
1130 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001131 if self.closed:
1132 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001133 nbytes = len(data)
1134 self._file_size += nbytes
1135 self._crc = crc32(data, self._crc)
1136 if self._compressor:
1137 data = self._compressor.compress(data)
1138 self._compress_size += len(data)
1139 self._fileobj.write(data)
1140 return nbytes
1141
1142 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001143 if self.closed:
1144 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001145 try:
1146 super().close()
1147 # Flush any data from the compressor, and update header info
1148 if self._compressor:
1149 buf = self._compressor.flush()
1150 self._compress_size += len(buf)
1151 self._fileobj.write(buf)
1152 self._zinfo.compress_size = self._compress_size
1153 else:
1154 self._zinfo.compress_size = self._file_size
1155 self._zinfo.CRC = self._crc
1156 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001157
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001158 # Write updated header info
1159 if self._zinfo.flag_bits & 0x08:
1160 # Write CRC and file sizes after the file data
1161 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1162 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1163 self._zinfo.compress_size, self._zinfo.file_size))
1164 self._zipfile.start_dir = self._fileobj.tell()
1165 else:
1166 if not self._zip64:
1167 if self._file_size > ZIP64_LIMIT:
1168 raise RuntimeError(
1169 'File size unexpectedly exceeded ZIP64 limit')
1170 if self._compress_size > ZIP64_LIMIT:
1171 raise RuntimeError(
1172 'Compressed size unexpectedly exceeded ZIP64 limit')
1173 # Seek backwards and write file header (which will now include
1174 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001175
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001176 # Preserve current position in file
1177 self._zipfile.start_dir = self._fileobj.tell()
1178 self._fileobj.seek(self._zinfo.header_offset)
1179 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1180 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001181
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001182 # Successfully written: Add file to our caches
1183 self._zipfile.filelist.append(self._zinfo)
1184 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1185 finally:
1186 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001187
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001188
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001189
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001190class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001191 """ Class with methods to open, read, write, close, list zip files.
1192
Bo Baylesce237c72018-01-29 23:54:07 -06001193 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1194 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001195
Fred Drake3d9091e2001-03-26 15:49:24 +00001196 file: Either the path to the file, or a file-like object.
1197 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001198 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1199 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001200 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1201 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001202 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1203 needed, otherwise it will raise an exception when this would
1204 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001205 compresslevel: None (default for the given compression type) or an integer
1206 specifying the level to pass to the compressor.
1207 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1208 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1209 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001210
Fred Drake3d9091e2001-03-26 15:49:24 +00001211 """
Fred Drake484d7352000-10-02 21:14:52 +00001212
Fred Drake90eac282001-02-28 05:29:34 +00001213 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001214 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001215
Bo Baylesce237c72018-01-29 23:54:07 -06001216 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001217 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001218 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1219 or append 'a'."""
1220 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001221 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001222
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001223 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001224
1225 self._allowZip64 = allowZip64
1226 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001227 self.debug = 0 # Level of printing: 0 through 3
1228 self.NameToInfo = {} # Find file info given name
1229 self.filelist = [] # List of ZipInfo instances for archive
1230 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001231 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001232 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001233 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001234 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001235 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001236
Fred Drake3d9091e2001-03-26 15:49:24 +00001237 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001238 if isinstance(file, os.PathLike):
1239 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001240 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001241 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001242 self._filePassed = 0
1243 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001244 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1245 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001246 filemode = modeDict[mode]
1247 while True:
1248 try:
1249 self.fp = io.open(file, filemode)
1250 except OSError:
1251 if filemode in modeDict:
1252 filemode = modeDict[filemode]
1253 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001254 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001255 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001256 else:
1257 self._filePassed = 1
1258 self.fp = file
1259 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001260 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001261 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001262 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001263 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001264
Antoine Pitrou17babc52012-11-17 23:50:08 +01001265 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001266 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001267 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001268 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001269 # set the modified flag so central directory gets written
1270 # even if no files are added to the archive
1271 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001272 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001273 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001274 except (AttributeError, OSError):
1275 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001276 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001277 self._seekable = False
1278 else:
1279 # Some file-like objects can provide tell() but not seek()
1280 try:
1281 self.fp.seek(self.start_dir)
1282 except (AttributeError, OSError):
1283 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001284 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001285 try:
1286 # See if file is a zip file
1287 self._RealGetContents()
1288 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001289 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001290 except BadZipFile:
1291 # file is not a zip file, just append
1292 self.fp.seek(0, 2)
1293
1294 # set the modified flag so central directory gets written
1295 # even if no files are added to the archive
1296 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001297 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001298 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001299 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001300 except:
1301 fp = self.fp
1302 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001303 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001304 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001306 def __enter__(self):
1307 return self
1308
1309 def __exit__(self, type, value, traceback):
1310 self.close()
1311
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001312 def __repr__(self):
1313 result = ['<%s.%s' % (self.__class__.__module__,
1314 self.__class__.__qualname__)]
1315 if self.fp is not None:
1316 if self._filePassed:
1317 result.append(' file=%r' % self.fp)
1318 elif self.filename is not None:
1319 result.append(' filename=%r' % self.filename)
1320 result.append(' mode=%r' % self.mode)
1321 else:
1322 result.append(' [closed]')
1323 result.append('>')
1324 return ''.join(result)
1325
Tim Peters7d3bad62001-04-04 18:56:49 +00001326 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001327 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001329 try:
1330 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001331 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001332 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001333 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001334 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001335 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001336 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001337 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1338 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001339 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001340
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001341 # "concat" is zero, unless zip was concatenated to another file
1342 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001343 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1344 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001345 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001346
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001347 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001348 inferred = concat + offset_cd
1349 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001351 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001353 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001354 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001355 total = 0
1356 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001357 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001358 if len(centdir) != sizeCentralDir:
1359 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001360 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001361 if centdir[_CD_SIGNATURE] != stringCentralDir:
1362 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001363 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001364 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001365 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001366 flags = centdir[5]
1367 if flags & 0x800:
1368 # UTF-8 file names extension
1369 filename = filename.decode('utf-8')
1370 else:
1371 # Historical ZIP filename encoding
1372 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001374 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001375 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1376 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001377 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001378 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001379 x.flag_bits, x.compress_type, t, d,
1380 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001381 if x.extract_version > MAX_EXTRACT_VERSION:
1382 raise NotImplementedError("zip file version %.1f" %
1383 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001384 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1385 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001386 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001387 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001388 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389
1390 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001391 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392 self.filelist.append(x)
1393 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001394
1395 # update total bytes read from central directory
1396 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1397 + centdir[_CD_EXTRA_FIELD_LENGTH]
1398 + centdir[_CD_COMMENT_LENGTH])
1399
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001401 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001402
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001403
1404 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001405 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001406 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001407
1408 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001409 """Return a list of class ZipInfo instances for files in the
1410 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001411 return self.filelist
1412
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001413 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001414 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001415 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1416 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001417 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001418 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001419 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1420 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001421
1422 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001423 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001424 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001425 for zinfo in self.filelist:
1426 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001427 # Read by chunks, to avoid an OverflowError or a
1428 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001429 with self.open(zinfo.filename, "r") as f:
1430 while f.read(chunk_size): # Check CRC-32
1431 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001432 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001433 return zinfo.filename
1434
1435 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001436 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001437 info = self.NameToInfo.get(name)
1438 if info is None:
1439 raise KeyError(
1440 'There is no item named %r in the archive' % name)
1441
1442 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001443
Thomas Wouterscf297e42007-02-23 15:07:44 +00001444 def setpassword(self, pwd):
1445 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001446 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001447 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001448 if pwd:
1449 self.pwd = pwd
1450 else:
1451 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001452
R David Murrayf50b38a2012-04-12 18:44:58 -04001453 @property
1454 def comment(self):
1455 """The comment text associated with the ZIP file."""
1456 return self._comment
1457
1458 @comment.setter
1459 def comment(self, comment):
1460 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001461 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001462 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001463 if len(comment) > ZIP_MAX_COMMENT:
1464 import warnings
1465 warnings.warn('Archive comment is too long; truncating to %d bytes'
1466 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001467 comment = comment[:ZIP_MAX_COMMENT]
1468 self._comment = comment
1469 self._didModify = True
1470
Thomas Wouterscf297e42007-02-23 15:07:44 +00001471 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001472 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001473 with self.open(name, "r", pwd) as fp:
1474 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001475
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001476 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001477 """Return file-like object for 'name'.
1478
1479 name is a string for the file name within the ZIP file, or a ZipInfo
1480 object.
1481
1482 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1483 write to a file newly added to the archive.
1484
1485 pwd is the password to decrypt files (only used for reading).
1486
1487 When writing, if the file size is not known in advance but may exceed
1488 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1489 files. If the size is known in advance, it is best to pass a ZipInfo
1490 instance for name, with zinfo.file_size set.
1491 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001492 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001493 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001494 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001495 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001496 if pwd and (mode == "w"):
1497 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001498 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001499 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001500 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001501
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001502 # Make sure we have an info object
1503 if isinstance(name, ZipInfo):
1504 # 'name' is already an info object
1505 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001506 elif mode == 'w':
1507 zinfo = ZipInfo(name)
1508 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001509 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001510 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001511 # Get info object for name
1512 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001513
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001514 if mode == 'w':
1515 return self._open_to_write(zinfo, force_zip64=force_zip64)
1516
1517 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001518 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001519 "is an open writing handle on it. "
1520 "Close the writing handle before trying to read.")
1521
1522 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001523 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001524 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1525 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001526 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001527 # Skip the file header:
1528 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001529 if len(fheader) != sizeFileHeader:
1530 raise BadZipFile("Truncated file header")
1531 fheader = struct.unpack(structFileHeader, fheader)
1532 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001533 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001534
Antoine Pitrou17babc52012-11-17 23:50:08 +01001535 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1536 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1537 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001538
Antoine Pitrou8572da52012-11-17 23:52:05 +01001539 if zinfo.flag_bits & 0x20:
1540 # Zip 2.7: compressed patched data
1541 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001542
Antoine Pitrou8572da52012-11-17 23:52:05 +01001543 if zinfo.flag_bits & 0x40:
1544 # strong encryption
1545 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001546
Antoine Pitrou17babc52012-11-17 23:50:08 +01001547 if zinfo.flag_bits & 0x800:
1548 # UTF-8 filename
1549 fname_str = fname.decode("utf-8")
1550 else:
1551 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001552
Antoine Pitrou17babc52012-11-17 23:50:08 +01001553 if fname_str != zinfo.orig_filename:
1554 raise BadZipFile(
1555 'File name in directory %r and header %r differ.'
1556 % (zinfo.orig_filename, fname))
1557
1558 # check for encrypted flag & handle password
1559 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001560 if is_encrypted:
1561 if not pwd:
1562 pwd = self.pwd
1563 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001564 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001565 "required for extraction" % name)
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -07001566 else:
1567 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001568
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -07001569 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001570 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001571 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001572 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001573
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001574 def _open_to_write(self, zinfo, force_zip64=False):
1575 if force_zip64 and not self._allowZip64:
1576 raise ValueError(
1577 "force_zip64 is True, but allowZip64 was False when opening "
1578 "the ZIP file."
1579 )
1580 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001581 raise ValueError("Can't write to the ZIP file while there is "
1582 "another write handle open on it. "
1583 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001584
1585 # Sizes and CRC are overwritten with correct data after processing the file
1586 if not hasattr(zinfo, 'file_size'):
1587 zinfo.file_size = 0
1588 zinfo.compress_size = 0
1589 zinfo.CRC = 0
1590
1591 zinfo.flag_bits = 0x00
1592 if zinfo.compress_type == ZIP_LZMA:
1593 # Compressed data includes an end-of-stream (EOS) marker
1594 zinfo.flag_bits |= 0x02
1595 if not self._seekable:
1596 zinfo.flag_bits |= 0x08
1597
1598 if not zinfo.external_attr:
1599 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1600
1601 # Compressed size can be larger than uncompressed size
1602 zip64 = self._allowZip64 and \
1603 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1604
1605 if self._seekable:
1606 self.fp.seek(self.start_dir)
1607 zinfo.header_offset = self.fp.tell()
1608
1609 self._writecheck(zinfo)
1610 self._didModify = True
1611
1612 self.fp.write(zinfo.FileHeader(zip64))
1613
1614 self._writing = True
1615 return _ZipWriteFile(self, zinfo, zip64)
1616
Christian Heimes790c8232008-01-07 21:14:23 +00001617 def extract(self, member, path=None, pwd=None):
1618 """Extract a member from the archive to the current working directory,
1619 using its full name. Its file information is extracted as accurately
1620 as possible. `member' may be a filename or a ZipInfo object. You can
1621 specify a different directory using `path'.
1622 """
Christian Heimes790c8232008-01-07 21:14:23 +00001623 if path is None:
1624 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001625 else:
1626 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001627
1628 return self._extract_member(member, path, pwd)
1629
1630 def extractall(self, path=None, members=None, pwd=None):
1631 """Extract all members from the archive to the current working
1632 directory. `path' specifies a different directory to extract to.
1633 `members' is optional and must be a subset of the list returned
1634 by namelist().
1635 """
1636 if members is None:
1637 members = self.namelist()
1638
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001639 if path is None:
1640 path = os.getcwd()
1641 else:
1642 path = os.fspath(path)
1643
Christian Heimes790c8232008-01-07 21:14:23 +00001644 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001645 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001646
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001647 @classmethod
1648 def _sanitize_windows_name(cls, arcname, pathsep):
1649 """Replace bad characters and remove trailing dots from parts."""
1650 table = cls._windows_illegal_name_trans_table
1651 if not table:
1652 illegal = ':<>|"?*'
1653 table = str.maketrans(illegal, '_' * len(illegal))
1654 cls._windows_illegal_name_trans_table = table
1655 arcname = arcname.translate(table)
1656 # remove trailing dots
1657 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1658 # rejoin, removing empty parts.
1659 arcname = pathsep.join(x for x in arcname if x)
1660 return arcname
1661
Christian Heimes790c8232008-01-07 21:14:23 +00001662 def _extract_member(self, member, targetpath, pwd):
1663 """Extract the ZipInfo object 'member' to a physical
1664 file on the path targetpath.
1665 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001666 if not isinstance(member, ZipInfo):
1667 member = self.getinfo(member)
1668
Christian Heimes790c8232008-01-07 21:14:23 +00001669 # build the destination pathname, replacing
1670 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001671 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001672
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001673 if os.path.altsep:
1674 arcname = arcname.replace(os.path.altsep, os.path.sep)
1675 # interpret absolute pathname as relative, remove drive letter or
1676 # UNC path, redundant separators, "." and ".." components.
1677 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001678 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001679 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001680 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001681 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001682 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001683 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001684
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001685 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001686 targetpath = os.path.normpath(targetpath)
1687
1688 # Create all upper directories if necessary.
1689 upperdirs = os.path.dirname(targetpath)
1690 if upperdirs and not os.path.exists(upperdirs):
1691 os.makedirs(upperdirs)
1692
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001693 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001694 if not os.path.isdir(targetpath):
1695 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001696 return targetpath
1697
Antoine Pitrou17babc52012-11-17 23:50:08 +01001698 with self.open(member, pwd=pwd) as source, \
1699 open(targetpath, "wb") as target:
1700 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001701
1702 return targetpath
1703
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001704 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001705 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001706 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001707 import warnings
1708 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001709 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001710 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001711 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001712 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001713 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001714 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001715 if not self._allowZip64:
1716 requires_zip64 = None
1717 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1718 requires_zip64 = "Files count"
1719 elif zinfo.file_size > ZIP64_LIMIT:
1720 requires_zip64 = "Filesize"
1721 elif zinfo.header_offset > ZIP64_LIMIT:
1722 requires_zip64 = "Zipfile size"
1723 if requires_zip64:
1724 raise LargeZipFile(requires_zip64 +
1725 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001726
Bo Baylesce237c72018-01-29 23:54:07 -06001727 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001728 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001729 """Put the bytes from filename into the archive under the name
1730 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001731 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001732 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001733 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001734 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001735 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001736 "Can't write to ZIP archive while an open writing handle exists"
1737 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001738
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001739 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001740 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001741
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001742 if zinfo.is_dir():
1743 zinfo.compress_size = 0
1744 zinfo.CRC = 0
1745 else:
1746 if compress_type is not None:
1747 zinfo.compress_type = compress_type
1748 else:
1749 zinfo.compress_type = self.compression
1750
Bo Baylesce237c72018-01-29 23:54:07 -06001751 if compresslevel is not None:
1752 zinfo._compresslevel = compresslevel
1753 else:
1754 zinfo._compresslevel = self.compresslevel
1755
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001756 if zinfo.is_dir():
1757 with self._lock:
1758 if self._seekable:
1759 self.fp.seek(self.start_dir)
1760 zinfo.header_offset = self.fp.tell() # Start of header bytes
1761 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001762 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001763 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001764
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001765 self._writecheck(zinfo)
1766 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001767
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001768 self.filelist.append(zinfo)
1769 self.NameToInfo[zinfo.filename] = zinfo
1770 self.fp.write(zinfo.FileHeader(False))
1771 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001772 else:
1773 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1774 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001775
Bo Baylesce237c72018-01-29 23:54:07 -06001776 def writestr(self, zinfo_or_arcname, data,
1777 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001778 """Write a file into the archive. The contents is 'data', which
1779 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1780 it is encoded as UTF-8 first.
1781 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001782 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001783 if isinstance(data, str):
1784 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001785 if not isinstance(zinfo_or_arcname, ZipInfo):
1786 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001787 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001788 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001789 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001790 if zinfo.filename[-1] == '/':
1791 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1792 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1793 else:
1794 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001795 else:
1796 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001797
1798 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001799 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001800 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001801 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001802 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001803 "Can't write to ZIP archive while an open writing handle exists."
1804 )
1805
1806 if compress_type is not None:
1807 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001808
Bo Baylesce237c72018-01-29 23:54:07 -06001809 if compresslevel is not None:
1810 zinfo._compresslevel = compresslevel
1811
Guido van Rossum85825dc2007-08-27 17:03:28 +00001812 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001813 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001814 with self.open(zinfo, mode='w') as dest:
1815 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001816
1817 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001818 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001819 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001820
1821 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001822 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001823 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001824 if self.fp is None:
1825 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001826
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001827 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001828 raise ValueError("Can't close the ZIP file while there is "
1829 "an open writing handle on it. "
1830 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001831
Antoine Pitrou17babc52012-11-17 23:50:08 +01001832 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001833 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001834 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001835 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001836 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001837 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001838 finally:
1839 fp = self.fp
1840 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001841 self._fpclose(fp)
1842
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001843 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001844 for zinfo in self.filelist: # write central directory
1845 dt = zinfo.date_time
1846 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1847 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1848 extra = []
1849 if zinfo.file_size > ZIP64_LIMIT \
1850 or zinfo.compress_size > ZIP64_LIMIT:
1851 extra.append(zinfo.file_size)
1852 extra.append(zinfo.compress_size)
1853 file_size = 0xffffffff
1854 compress_size = 0xffffffff
1855 else:
1856 file_size = zinfo.file_size
1857 compress_size = zinfo.compress_size
1858
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001859 if zinfo.header_offset > ZIP64_LIMIT:
1860 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001861 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001862 else:
1863 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001864
1865 extra_data = zinfo.extra
1866 min_version = 0
1867 if extra:
1868 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001869 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001870 extra_data = struct.pack(
1871 '<HH' + 'Q'*len(extra),
1872 1, 8*len(extra), *extra) + extra_data
1873
1874 min_version = ZIP64_VERSION
1875
1876 if zinfo.compress_type == ZIP_BZIP2:
1877 min_version = max(BZIP2_VERSION, min_version)
1878 elif zinfo.compress_type == ZIP_LZMA:
1879 min_version = max(LZMA_VERSION, min_version)
1880
1881 extract_version = max(min_version, zinfo.extract_version)
1882 create_version = max(min_version, zinfo.create_version)
1883 try:
1884 filename, flag_bits = zinfo._encodeFilenameFlags()
1885 centdir = struct.pack(structCentralDir,
1886 stringCentralDir, create_version,
1887 zinfo.create_system, extract_version, zinfo.reserved,
1888 flag_bits, zinfo.compress_type, dostime, dosdate,
1889 zinfo.CRC, compress_size, file_size,
1890 len(filename), len(extra_data), len(zinfo.comment),
1891 0, zinfo.internal_attr, zinfo.external_attr,
1892 header_offset)
1893 except DeprecationWarning:
1894 print((structCentralDir, stringCentralDir, create_version,
1895 zinfo.create_system, extract_version, zinfo.reserved,
1896 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1897 zinfo.CRC, compress_size, file_size,
1898 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1899 0, zinfo.internal_attr, zinfo.external_attr,
1900 header_offset), file=sys.stderr)
1901 raise
1902 self.fp.write(centdir)
1903 self.fp.write(filename)
1904 self.fp.write(extra_data)
1905 self.fp.write(zinfo.comment)
1906
1907 pos2 = self.fp.tell()
1908 # Write end-of-zip-archive record
1909 centDirCount = len(self.filelist)
1910 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001911 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001912 requires_zip64 = None
1913 if centDirCount > ZIP_FILECOUNT_LIMIT:
1914 requires_zip64 = "Files count"
1915 elif centDirOffset > ZIP64_LIMIT:
1916 requires_zip64 = "Central directory offset"
1917 elif centDirSize > ZIP64_LIMIT:
1918 requires_zip64 = "Central directory size"
1919 if requires_zip64:
1920 # Need to write the ZIP64 end-of-archive records
1921 if not self._allowZip64:
1922 raise LargeZipFile(requires_zip64 +
1923 " would require ZIP64 extensions")
1924 zip64endrec = struct.pack(
1925 structEndArchive64, stringEndArchive64,
1926 44, 45, 45, 0, 0, centDirCount, centDirCount,
1927 centDirSize, centDirOffset)
1928 self.fp.write(zip64endrec)
1929
1930 zip64locrec = struct.pack(
1931 structEndArchive64Locator,
1932 stringEndArchive64Locator, 0, pos2, 1)
1933 self.fp.write(zip64locrec)
1934 centDirCount = min(centDirCount, 0xFFFF)
1935 centDirSize = min(centDirSize, 0xFFFFFFFF)
1936 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1937
1938 endrec = struct.pack(structEndArchive, stringEndArchive,
1939 0, 0, centDirCount, centDirCount,
1940 centDirSize, centDirOffset, len(self._comment))
1941 self.fp.write(endrec)
1942 self.fp.write(self._comment)
1943 self.fp.flush()
1944
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001945 def _fpclose(self, fp):
1946 assert self._fileRefCnt > 0
1947 self._fileRefCnt -= 1
1948 if not self._fileRefCnt and not self._filePassed:
1949 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001950
1951
1952class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001953 """Class to create ZIP archives with Python library files and packages."""
1954
Georg Brandl8334fd92010-12-04 10:26:46 +00001955 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001956 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001957 ZipFile.__init__(self, file, mode=mode, compression=compression,
1958 allowZip64=allowZip64)
1959 self._optimize = optimize
1960
Christian Tismer59202e52013-10-21 03:59:23 +02001961 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001962 """Add all files from "pathname" to the ZIP archive.
1963
Fred Drake484d7352000-10-02 21:14:52 +00001964 If pathname is a package directory, search the directory and
1965 all package subdirectories recursively for all *.py and enter
1966 the modules into the archive. If pathname is a plain
1967 directory, listdir *.py and enter all modules. Else, pathname
1968 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001969 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001970 This method will compile the module.py into module.pyc if
1971 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001972 If filterfunc(pathname) is given, it is called with every argument.
1973 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001974 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001975 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001976 if filterfunc and not filterfunc(pathname):
1977 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001978 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001979 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001980 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001981 dir, name = os.path.split(pathname)
1982 if os.path.isdir(pathname):
1983 initname = os.path.join(pathname, "__init__.py")
1984 if os.path.isfile(initname):
1985 # This is a package directory, add it
1986 if basename:
1987 basename = "%s/%s" % (basename, name)
1988 else:
1989 basename = name
1990 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001991 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001992 fname, arcname = self._get_codename(initname[0:-3], basename)
1993 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001994 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001995 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001996 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001997 dirlist.remove("__init__.py")
1998 # Add all *.py files and package subdirectories
1999 for filename in dirlist:
2000 path = os.path.join(pathname, filename)
2001 root, ext = os.path.splitext(filename)
2002 if os.path.isdir(path):
2003 if os.path.isfile(os.path.join(path, "__init__.py")):
2004 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02002005 self.writepy(path, basename,
2006 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002007 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002008 if filterfunc and not filterfunc(path):
2009 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002010 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002011 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002012 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002013 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002014 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002015 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002016 self.write(fname, arcname)
2017 else:
2018 # This is NOT a package directory, add its files at top level
2019 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002020 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002021 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002022 path = os.path.join(pathname, filename)
2023 root, ext = os.path.splitext(filename)
2024 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002025 if filterfunc and not filterfunc(path):
2026 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002027 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002028 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002029 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002030 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002031 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002032 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002033 self.write(fname, arcname)
2034 else:
2035 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002036 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002037 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002038 fname, arcname = self._get_codename(pathname[0:-3], basename)
2039 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002040 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002041 self.write(fname, arcname)
2042
2043 def _get_codename(self, pathname, basename):
2044 """Return (filename, archivename) for the path.
2045
Fred Drake484d7352000-10-02 21:14:52 +00002046 Given a module name path, return the correct file path and
2047 archive name, compiling if necessary. For example, given
2048 /python/lib/string, return (/python/lib/string.pyc, string).
2049 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002050 def _compile(file, optimize=-1):
2051 import py_compile
2052 if self.debug:
2053 print("Compiling", file)
2054 try:
2055 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002056 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002057 print(err.msg)
2058 return False
2059 return True
2060
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002061 file_py = pathname + ".py"
2062 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002063 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2064 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2065 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002066 if self._optimize == -1:
2067 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002068 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002069 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2070 # Use .pyc file.
2071 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002072 elif (os.path.isfile(pycache_opt0) and
2073 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002074 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2075 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002076 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002077 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002078 elif (os.path.isfile(pycache_opt1) and
2079 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2080 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002081 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002082 fname = pycache_opt1
2083 arcname = file_pyc
2084 elif (os.path.isfile(pycache_opt2) and
2085 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2086 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2087 # file name in the archive.
2088 fname = pycache_opt2
2089 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002090 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002091 # Compile py into PEP 3147 pyc file.
2092 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002093 if sys.flags.optimize == 0:
2094 fname = pycache_opt0
2095 elif sys.flags.optimize == 1:
2096 fname = pycache_opt1
2097 else:
2098 fname = pycache_opt2
2099 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002100 else:
2101 fname = arcname = file_py
2102 else:
2103 # new mode: use given optimization level
2104 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002105 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002106 arcname = file_pyc
2107 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002108 arcname = file_pyc
2109 if self._optimize == 1:
2110 fname = pycache_opt1
2111 elif self._optimize == 2:
2112 fname = pycache_opt2
2113 else:
2114 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2115 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002116 if not (os.path.isfile(fname) and
2117 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2118 if not _compile(file_py, optimize=self._optimize):
2119 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002120 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002121 if basename:
2122 archivename = "%s/%s" % (basename, archivename)
2123 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002124
2125
Miss Islington (bot)c410f382019-08-24 09:03:52 -07002126def _unique_everseen(iterable, key=None):
2127 "List unique elements, preserving order. Remember all elements ever seen."
2128 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2129 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2130 seen = set()
2131 seen_add = seen.add
2132 if key is None:
2133 for element in itertools.filterfalse(seen.__contains__, iterable):
2134 seen_add(element)
2135 yield element
2136 else:
2137 for element in iterable:
2138 k = key(element)
2139 if k not in seen:
2140 seen_add(k)
2141 yield element
2142
2143
2144def _parents(path):
2145 """
2146 Given a path with elements separated by
2147 posixpath.sep, generate all parents of that path.
2148
2149 >>> list(_parents('b/d'))
2150 ['b']
2151 >>> list(_parents('/b/d/'))
2152 ['/b']
2153 >>> list(_parents('b/d/f/'))
2154 ['b/d', 'b']
2155 >>> list(_parents('b'))
2156 []
2157 >>> list(_parents(''))
2158 []
2159 """
2160 return itertools.islice(_ancestry(path), 1, None)
2161
2162
2163def _ancestry(path):
2164 """
2165 Given a path with elements separated by
2166 posixpath.sep, generate all elements of that path
2167
2168 >>> list(_ancestry('b/d'))
2169 ['b/d', 'b']
2170 >>> list(_ancestry('/b/d/'))
2171 ['/b/d', '/b']
2172 >>> list(_ancestry('b/d/f/'))
2173 ['b/d/f', 'b/d', 'b']
2174 >>> list(_ancestry('b'))
2175 ['b']
2176 >>> list(_ancestry(''))
2177 []
2178 """
2179 path = path.rstrip(posixpath.sep)
2180 while path and path != posixpath.sep:
2181 yield path
2182 path, tail = posixpath.split(path)
2183
2184
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002185class Path:
2186 """
2187 A pathlib-compatible interface for zip files.
2188
2189 Consider a zip file with this structure::
2190
2191 .
2192 ├── a.txt
2193 └── b
2194 ├── c.txt
2195 └── d
2196 └── e.txt
2197
2198 >>> data = io.BytesIO()
2199 >>> zf = ZipFile(data, 'w')
2200 >>> zf.writestr('a.txt', 'content of a')
2201 >>> zf.writestr('b/c.txt', 'content of c')
2202 >>> zf.writestr('b/d/e.txt', 'content of e')
2203 >>> zf.filename = 'abcde.zip'
2204
2205 Path accepts the zipfile object itself or a filename
2206
2207 >>> root = Path(zf)
2208
2209 From there, several path operations are available.
2210
2211 Directory iteration (including the zip file itself):
2212
2213 >>> a, b = root.iterdir()
2214 >>> a
2215 Path('abcde.zip', 'a.txt')
2216 >>> b
2217 Path('abcde.zip', 'b/')
2218
2219 name property:
2220
2221 >>> b.name
2222 'b'
2223
2224 join with divide operator:
2225
2226 >>> c = b / 'c.txt'
2227 >>> c
2228 Path('abcde.zip', 'b/c.txt')
2229 >>> c.name
2230 'c.txt'
2231
2232 Read text:
2233
2234 >>> c.read_text()
2235 'content of c'
2236
2237 existence:
2238
2239 >>> c.exists()
2240 True
2241 >>> (b / 'missing.txt').exists()
2242 False
2243
Xtreak0d702272019-06-03 04:42:33 +05302244 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002245
2246 >>> str(c)
2247 'abcde.zip/b/c.txt'
2248 """
2249
2250 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2251
2252 def __init__(self, root, at=""):
2253 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2254 self.at = at
2255
2256 @property
2257 def open(self):
2258 return functools.partial(self.root.open, self.at)
2259
2260 @property
2261 def name(self):
2262 return posixpath.basename(self.at.rstrip("/"))
2263
2264 def read_text(self, *args, **kwargs):
2265 with self.open() as strm:
2266 return io.TextIOWrapper(strm, *args, **kwargs).read()
2267
2268 def read_bytes(self):
2269 with self.open() as strm:
2270 return strm.read()
2271
2272 def _is_child(self, path):
2273 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2274
2275 def _next(self, at):
2276 return Path(self.root, at)
2277
2278 def is_dir(self):
2279 return not self.at or self.at.endswith("/")
2280
2281 def is_file(self):
2282 return not self.is_dir()
2283
2284 def exists(self):
2285 return self.at in self._names()
2286
2287 def iterdir(self):
2288 if not self.is_dir():
2289 raise ValueError("Can't listdir a file")
2290 subs = map(self._next, self._names())
2291 return filter(self._is_child, subs)
2292
2293 def __str__(self):
2294 return posixpath.join(self.root.filename, self.at)
2295
2296 def __repr__(self):
2297 return self.__repr.format(self=self)
2298
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002299 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002300 next = posixpath.join(self.at, add)
2301 next_dir = posixpath.join(self.at, add, "")
2302 names = self._names()
2303 return self._next(next_dir if next not in names and next_dir in names else next)
2304
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002305 __truediv__ = joinpath
2306
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002307 @staticmethod
Miss Islington (bot)c410f382019-08-24 09:03:52 -07002308 def _implied_dirs(names):
2309 return _unique_everseen(
2310 parent + "/"
2311 for name in names
2312 for parent in _parents(name)
2313 if parent + "/" not in names
2314 )
2315
2316 @classmethod
2317 def _add_implied_dirs(cls, names):
2318 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002319
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002320 @property
2321 def parent(self):
Miss Islington (bot)66905d12019-07-07 15:05:53 -07002322 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002323 if parent_at:
2324 parent_at += '/'
2325 return self._next(parent_at)
2326
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002327 def _names(self):
2328 return self._add_implied_dirs(self.root.namelist())
2329
2330
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002331def main(args=None):
2332 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002333
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002334 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002335 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002336 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002337 group.add_argument('-l', '--list', metavar='<zipfile>',
2338 help='Show listing of a zipfile')
2339 group.add_argument('-e', '--extract', nargs=2,
2340 metavar=('<zipfile>', '<output_dir>'),
2341 help='Extract zipfile into target dir')
2342 group.add_argument('-c', '--create', nargs='+',
2343 metavar=('<name>', '<file>'),
2344 help='Create zipfile from sources')
2345 group.add_argument('-t', '--test', metavar='<zipfile>',
2346 help='Test if a zipfile is valid')
2347 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002348
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002349 if args.test is not None:
2350 src = args.test
2351 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002352 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002353 if badfile:
2354 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002355 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002356
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002357 elif args.list is not None:
2358 src = args.list
2359 with ZipFile(src, 'r') as zf:
2360 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002361
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002362 elif args.extract is not None:
2363 src, curdir = args.extract
2364 with ZipFile(src, 'r') as zf:
2365 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002366
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002367 elif args.create is not None:
2368 zip_name = args.create.pop(0)
2369 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002370
2371 def addToZip(zf, path, zippath):
2372 if os.path.isfile(path):
2373 zf.write(path, zippath, ZIP_DEFLATED)
2374 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002375 if zippath:
2376 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002377 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002378 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002379 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002380 # else: ignore
2381
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002382 with ZipFile(zip_name, 'w') as zf:
2383 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002384 zippath = os.path.basename(path)
2385 if not zippath:
2386 zippath = os.path.basename(os.path.dirname(path))
2387 if zippath in ('', os.curdir, os.pardir):
2388 zippath = ''
2389 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002390
2391if __name__ == "__main__":
2392 main()