blob: dfd090795019622a84d3cbc04734bff6ec0e8118 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
Miss Islington (bot)c410f382019-08-24 09:03:52 -070010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000380 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000381 # CRC CRC-32 of the uncompressed file
382 # compress_size Size of the compressed file
383 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000468 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000469 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000471 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000473 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 elif ln == 0:
475 counts = ()
476 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300477 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478
479 idx = 0
480
481 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000482 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000483 self.file_size = counts[idx]
484 idx += 1
485
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000486 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000487 self.compress_size = counts[idx]
488 idx += 1
489
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000490 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000491 old = self.header_offset
492 self.header_offset = counts[idx]
493 idx+=1
494
495 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000496
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200497 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200498 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200499 """Construct an appropriate ZipInfo for a file on the filesystem.
500
501 filename should be the path to a file or directory on the filesystem.
502
503 arcname is the name which it will have within the archive (by default,
504 this will be the same as filename, but without a drive letter and with
505 leading path separators removed).
506 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200507 if isinstance(filename, os.PathLike):
508 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 st = os.stat(filename)
510 isdir = stat.S_ISDIR(st.st_mode)
511 mtime = time.localtime(st.st_mtime)
512 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200513 if not strict_timestamps and date_time[0] < 1980:
514 date_time = (1980, 1, 1, 0, 0, 0)
515 elif not strict_timestamps and date_time[0] > 2107:
516 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200517 # Create ZipInfo instance to store file information
518 if arcname is None:
519 arcname = filename
520 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
521 while arcname[0] in (os.sep, os.altsep):
522 arcname = arcname[1:]
523 if isdir:
524 arcname += '/'
525 zinfo = cls(arcname, date_time)
526 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
527 if isdir:
528 zinfo.file_size = 0
529 zinfo.external_attr |= 0x10 # MS-DOS directory flag
530 else:
531 zinfo.file_size = st.st_size
532
533 return zinfo
534
535 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300536 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200537 return self.filename[-1] == '/'
538
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000539
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300540# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
541# internal keys. We noticed that a direct implementation is faster than
542# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300544_crctable = None
545def _gen_crc(crc):
546 for j in range(8):
547 if crc & 1:
548 crc = (crc >> 1) ^ 0xEDB88320
549 else:
550 crc >>= 1
551 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000552
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300553# ZIP supports a password-based form of encryption. Even though known
554# plaintext attacks have been found against it, it is still useful
555# to be able to get data out of such a file.
556#
557# Usage:
558# zd = _ZipDecrypter(mypwd)
559# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000560
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300561def _ZipDecrypter(pwd):
562 key0 = 305419896
563 key1 = 591751049
564 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 global _crctable
567 if _crctable is None:
568 _crctable = list(map(_gen_crc, range(256)))
569 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000570
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300571 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000574
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300575 def update_keys(c):
576 nonlocal key0, key1, key2
577 key0 = crc32(c, key0)
578 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
579 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
580 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000581
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300582 for p in pwd:
583 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000584
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300585 def decrypter(data):
586 """Decrypt a bytes object."""
587 result = bytearray()
588 append = result.append
589 for c in data:
590 k = key2 | 2
591 c ^= ((k * (k^1)) >> 8) & 0xFF
592 update_keys(c)
593 append(c)
594 return bytes(result)
595
596 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000597
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200598
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599class LZMACompressor:
600
601 def __init__(self):
602 self._comp = None
603
604 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200605 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200606 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200607 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200608 ])
609 return struct.pack('<BBH', 9, 4, len(props)) + props
610
611 def compress(self, data):
612 if self._comp is None:
613 return self._init() + self._comp.compress(data)
614 return self._comp.compress(data)
615
616 def flush(self):
617 if self._comp is None:
618 return self._init() + self._comp.flush()
619 return self._comp.flush()
620
621
622class LZMADecompressor:
623
624 def __init__(self):
625 self._decomp = None
626 self._unconsumed = b''
627 self.eof = False
628
629 def decompress(self, data):
630 if self._decomp is None:
631 self._unconsumed += data
632 if len(self._unconsumed) <= 4:
633 return b''
634 psize, = struct.unpack('<H', self._unconsumed[2:4])
635 if len(self._unconsumed) <= 4 + psize:
636 return b''
637
638 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200639 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
640 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200641 ])
642 data = self._unconsumed[4 + psize:]
643 del self._unconsumed
644
645 result = self._decomp.decompress(data)
646 self.eof = self._decomp.eof
647 return result
648
649
650compressor_names = {
651 0: 'store',
652 1: 'shrink',
653 2: 'reduce',
654 3: 'reduce',
655 4: 'reduce',
656 5: 'reduce',
657 6: 'implode',
658 7: 'tokenize',
659 8: 'deflate',
660 9: 'deflate64',
661 10: 'implode',
662 12: 'bzip2',
663 14: 'lzma',
664 18: 'terse',
665 19: 'lz77',
666 97: 'wavpack',
667 98: 'ppmd',
668}
669
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200670def _check_compression(compression):
671 if compression == ZIP_STORED:
672 pass
673 elif compression == ZIP_DEFLATED:
674 if not zlib:
675 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200676 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200677 elif compression == ZIP_BZIP2:
678 if not bz2:
679 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200680 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200681 elif compression == ZIP_LZMA:
682 if not lzma:
683 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200684 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300686 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687
688
Bo Baylesce237c72018-01-29 23:54:07 -0600689def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600691 if compresslevel is not None:
692 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
693 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600695 if compresslevel is not None:
696 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200697 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600698 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200699 elif compress_type == ZIP_LZMA:
700 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200701 else:
702 return None
703
704
705def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200706 if compress_type == ZIP_STORED:
707 return None
708 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200709 return zlib.decompressobj(-15)
710 elif compress_type == ZIP_BZIP2:
711 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200712 elif compress_type == ZIP_LZMA:
713 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200714 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200715 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200716 if descr:
717 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
718 else:
719 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200720
721
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200722class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300723 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200724 self._file = file
725 self._pos = pos
726 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200727 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300728 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700729 self.seekable = file.seekable
730 self.tell = file.tell
731
732 def seek(self, offset, whence=0):
733 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200734 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700735 raise ValueError("Can't reposition in the ZIP file while "
736 "there is an open writing handle on it. "
737 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200738 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700739 self._pos = self._file.tell()
740 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200741
742 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200743 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300744 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300745 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300746 "is an open writing handle on it. "
747 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200748 self._file.seek(self._pos)
749 data = self._file.read(n)
750 self._pos = self._file.tell()
751 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200752
753 def close(self):
754 if self._file is not None:
755 fileobj = self._file
756 self._file = None
757 self._close(fileobj)
758
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200759# Provide the tell method for unseekable stream
760class _Tellable:
761 def __init__(self, fp):
762 self.fp = fp
763 self.offset = 0
764
765 def write(self, data):
766 n = self.fp.write(data)
767 self.offset += n
768 return n
769
770 def tell(self):
771 return self.offset
772
773 def flush(self):
774 self.fp.flush()
775
776 def close(self):
777 self.fp.close()
778
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781 """File-like object for reading an archive member.
782 Is returned by ZipFile.open().
783 """
784
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000785 # Max size supported by decompressor.
786 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000787
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000788 # Read from compressed files in 4k blocks.
789 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000790
John Jolly066df4f2018-01-30 01:51:35 -0700791 # Chunk size to read during seek
792 MAX_SEEK_READ = 1 << 24
793
Łukasz Langae94980a2010-11-22 23:31:26 +0000794 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
795 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000796 self._fileobj = fileobj
797 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000798 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000799
Ezio Melotti92b47432010-01-28 01:44:41 +0000800 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000801 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200802 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000803
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200804 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000805
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200806 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000807 self._readbuffer = b''
808 self._offset = 0
809
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000810 self.newlines = None
811
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000812 # Adjust read size for encrypted files since the first 12 bytes
813 # are for the encryption/password information.
814 if self._decrypter is not None:
815 self._compress_left -= 12
816
817 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000818 self.name = zipinfo.filename
819
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000820 if hasattr(zipinfo, 'CRC'):
821 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000822 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000823 else:
824 self._expected_crc = None
825
John Jolly066df4f2018-01-30 01:51:35 -0700826 self._seekable = False
827 try:
828 if fileobj.seekable():
829 self._orig_compress_start = fileobj.tell()
830 self._orig_compress_size = zipinfo.compress_size
831 self._orig_file_size = zipinfo.file_size
832 self._orig_start_crc = self._running_crc
833 self._seekable = True
834 except AttributeError:
835 pass
836
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200837 def __repr__(self):
838 result = ['<%s.%s' % (self.__class__.__module__,
839 self.__class__.__qualname__)]
840 if not self.closed:
841 result.append(' name=%r mode=%r' % (self.name, self.mode))
842 if self._compress_type != ZIP_STORED:
843 result.append(' compress_type=%s' %
844 compressor_names.get(self._compress_type,
845 self._compress_type))
846 else:
847 result.append(' [closed]')
848 result.append('>')
849 return ''.join(result)
850
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000851 def readline(self, limit=-1):
852 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000854 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856
Serhiy Storchakae670be22016-06-11 19:32:44 +0300857 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000858 # Shortcut common case - newline found in buffer.
859 i = self._readbuffer.find(b'\n', self._offset) + 1
860 if i > 0:
861 line = self._readbuffer[self._offset: i]
862 self._offset = i
863 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
Serhiy Storchakae670be22016-06-11 19:32:44 +0300865 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000866
867 def peek(self, n=1):
868 """Returns buffered bytes without advancing the position."""
869 if n > len(self._readbuffer) - self._offset:
870 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200871 if len(chunk) > self._offset:
872 self._readbuffer = chunk + self._readbuffer[self._offset:]
873 self._offset = 0
874 else:
875 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000876
877 # Return up to 512 bytes to reduce allocation overhead for tight loops.
878 return self._readbuffer[self._offset: self._offset + 512]
879
880 def readable(self):
881 return True
882
883 def read(self, n=-1):
884 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800885 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200887 if n is None or n < 0:
888 buf = self._readbuffer[self._offset:]
889 self._readbuffer = b''
890 self._offset = 0
891 while not self._eof:
892 buf += self._read1(self.MAX_N)
893 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894
Antoine Pitrou78157b32012-06-23 16:44:48 +0200895 end = n + self._offset
896 if end < len(self._readbuffer):
897 buf = self._readbuffer[self._offset:end]
898 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200899 return buf
900
Antoine Pitrou78157b32012-06-23 16:44:48 +0200901 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200902 buf = self._readbuffer[self._offset:]
903 self._readbuffer = b''
904 self._offset = 0
905 while n > 0 and not self._eof:
906 data = self._read1(n)
907 if n < len(data):
908 self._readbuffer = data
909 self._offset = n
910 buf += data[:n]
911 break
912 buf += data
913 n -= len(data)
914 return buf
915
916 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000917 # Update the CRC using the given data.
918 if self._expected_crc is None:
919 # No need to compute the CRC if we don't have a reference value
920 return
Martin Panterb82032f2015-12-11 05:19:29 +0000921 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000922 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200923 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000924 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000925
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000926 def read1(self, n):
927 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200929 if n is None or n < 0:
930 buf = self._readbuffer[self._offset:]
931 self._readbuffer = b''
932 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300933 while not self._eof:
934 data = self._read1(self.MAX_N)
935 if data:
936 buf += data
937 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200938 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939
Antoine Pitrou78157b32012-06-23 16:44:48 +0200940 end = n + self._offset
941 if end < len(self._readbuffer):
942 buf = self._readbuffer[self._offset:end]
943 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200944 return buf
945
Antoine Pitrou78157b32012-06-23 16:44:48 +0200946 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200947 buf = self._readbuffer[self._offset:]
948 self._readbuffer = b''
949 self._offset = 0
950 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300951 while not self._eof:
952 data = self._read1(n)
953 if n < len(data):
954 self._readbuffer = data
955 self._offset = n
956 buf += data[:n]
957 break
958 if data:
959 buf += data
960 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200961 return buf
962
963 def _read1(self, n):
964 # Read up to n compressed bytes with at most one read() system call,
965 # decrypt and decompress them.
966 if self._eof or n <= 0:
967 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000969 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200970 if self._compress_type == ZIP_DEFLATED:
971 ## Handle unconsumed data.
972 data = self._decompressor.unconsumed_tail
973 if n > len(data):
974 data += self._read2(n - len(data))
975 else:
976 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000977
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200978 if self._compress_type == ZIP_STORED:
979 self._eof = self._compress_left <= 0
980 elif self._compress_type == ZIP_DEFLATED:
981 n = max(n, self.MIN_READ_SIZE)
982 data = self._decompressor.decompress(data, n)
983 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200984 self._compress_left <= 0 and
985 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200986 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000987 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200988 else:
989 data = self._decompressor.decompress(data)
990 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200992 data = data[:self._left]
993 self._left -= len(data)
994 if self._left <= 0:
995 self._eof = True
996 self._update_crc(data)
997 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000998
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200999 def _read2(self, n):
1000 if self._compress_left <= 0:
1001 return b''
1002
1003 n = max(n, self.MIN_READ_SIZE)
1004 n = min(n, self._compress_left)
1005
1006 data = self._fileobj.read(n)
1007 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001008 if not data:
1009 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001010
1011 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001012 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001013 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014
Łukasz Langae94980a2010-11-22 23:31:26 +00001015 def close(self):
1016 try:
1017 if self._close_fileobj:
1018 self._fileobj.close()
1019 finally:
1020 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021
John Jolly066df4f2018-01-30 01:51:35 -07001022 def seekable(self):
1023 return self._seekable
1024
1025 def seek(self, offset, whence=0):
1026 if not self._seekable:
1027 raise io.UnsupportedOperation("underlying stream is not seekable")
1028 curr_pos = self.tell()
1029 if whence == 0: # Seek from start of file
1030 new_pos = offset
1031 elif whence == 1: # Seek from current position
1032 new_pos = curr_pos + offset
1033 elif whence == 2: # Seek from EOF
1034 new_pos = self._orig_file_size + offset
1035 else:
1036 raise ValueError("whence must be os.SEEK_SET (0), "
1037 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1038
1039 if new_pos > self._orig_file_size:
1040 new_pos = self._orig_file_size
1041
1042 if new_pos < 0:
1043 new_pos = 0
1044
1045 read_offset = new_pos - curr_pos
1046 buff_offset = read_offset + self._offset
1047
1048 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1049 # Just move the _offset index if the new position is in the _readbuffer
1050 self._offset = buff_offset
1051 read_offset = 0
1052 elif read_offset < 0:
1053 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001054 self._fileobj.seek(self._orig_compress_start)
1055 self._running_crc = self._orig_start_crc
1056 self._compress_left = self._orig_compress_size
1057 self._left = self._orig_file_size
1058 self._readbuffer = b''
1059 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001060 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001061 self._eof = False
1062 read_offset = new_pos
1063
1064 while read_offset > 0:
1065 read_len = min(self.MAX_SEEK_READ, read_offset)
1066 self.read(read_len)
1067 read_offset -= read_len
1068
1069 return self.tell()
1070
1071 def tell(self):
1072 if not self._seekable:
1073 raise io.UnsupportedOperation("underlying stream is not seekable")
1074 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1075 return filepos
1076
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001077
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001078class _ZipWriteFile(io.BufferedIOBase):
1079 def __init__(self, zf, zinfo, zip64):
1080 self._zinfo = zinfo
1081 self._zip64 = zip64
1082 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001083 self._compressor = _get_compressor(zinfo.compress_type,
1084 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001085 self._file_size = 0
1086 self._compress_size = 0
1087 self._crc = 0
1088
1089 @property
1090 def _fileobj(self):
1091 return self._zipfile.fp
1092
1093 def writable(self):
1094 return True
1095
1096 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001097 if self.closed:
1098 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001099 nbytes = len(data)
1100 self._file_size += nbytes
1101 self._crc = crc32(data, self._crc)
1102 if self._compressor:
1103 data = self._compressor.compress(data)
1104 self._compress_size += len(data)
1105 self._fileobj.write(data)
1106 return nbytes
1107
1108 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001109 if self.closed:
1110 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001111 try:
1112 super().close()
1113 # Flush any data from the compressor, and update header info
1114 if self._compressor:
1115 buf = self._compressor.flush()
1116 self._compress_size += len(buf)
1117 self._fileobj.write(buf)
1118 self._zinfo.compress_size = self._compress_size
1119 else:
1120 self._zinfo.compress_size = self._file_size
1121 self._zinfo.CRC = self._crc
1122 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001123
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001124 # Write updated header info
1125 if self._zinfo.flag_bits & 0x08:
1126 # Write CRC and file sizes after the file data
1127 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1128 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1129 self._zinfo.compress_size, self._zinfo.file_size))
1130 self._zipfile.start_dir = self._fileobj.tell()
1131 else:
1132 if not self._zip64:
1133 if self._file_size > ZIP64_LIMIT:
1134 raise RuntimeError(
1135 'File size unexpectedly exceeded ZIP64 limit')
1136 if self._compress_size > ZIP64_LIMIT:
1137 raise RuntimeError(
1138 'Compressed size unexpectedly exceeded ZIP64 limit')
1139 # Seek backwards and write file header (which will now include
1140 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001141
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001142 # Preserve current position in file
1143 self._zipfile.start_dir = self._fileobj.tell()
1144 self._fileobj.seek(self._zinfo.header_offset)
1145 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1146 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001147
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001148 # Successfully written: Add file to our caches
1149 self._zipfile.filelist.append(self._zinfo)
1150 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1151 finally:
1152 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001153
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001154
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001155
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001156class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001157 """ Class with methods to open, read, write, close, list zip files.
1158
Bo Baylesce237c72018-01-29 23:54:07 -06001159 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1160 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001161
Fred Drake3d9091e2001-03-26 15:49:24 +00001162 file: Either the path to the file, or a file-like object.
1163 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001164 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1165 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001166 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1167 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001168 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1169 needed, otherwise it will raise an exception when this would
1170 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001171 compresslevel: None (default for the given compression type) or an integer
1172 specifying the level to pass to the compressor.
1173 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1174 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1175 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001176
Fred Drake3d9091e2001-03-26 15:49:24 +00001177 """
Fred Drake484d7352000-10-02 21:14:52 +00001178
Fred Drake90eac282001-02-28 05:29:34 +00001179 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001180 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001181
Bo Baylesce237c72018-01-29 23:54:07 -06001182 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001183 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001184 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1185 or append 'a'."""
1186 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001187 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001188
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001189 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001190
1191 self._allowZip64 = allowZip64
1192 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001193 self.debug = 0 # Level of printing: 0 through 3
1194 self.NameToInfo = {} # Find file info given name
1195 self.filelist = [] # List of ZipInfo instances for archive
1196 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001197 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001198 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001199 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001200 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001201 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001202
Fred Drake3d9091e2001-03-26 15:49:24 +00001203 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001204 if isinstance(file, os.PathLike):
1205 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001206 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001207 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001208 self._filePassed = 0
1209 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001210 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1211 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001212 filemode = modeDict[mode]
1213 while True:
1214 try:
1215 self.fp = io.open(file, filemode)
1216 except OSError:
1217 if filemode in modeDict:
1218 filemode = modeDict[filemode]
1219 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001220 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001221 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001222 else:
1223 self._filePassed = 1
1224 self.fp = file
1225 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001226 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001227 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001228 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001229 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001230
Antoine Pitrou17babc52012-11-17 23:50:08 +01001231 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001232 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001233 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001234 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001235 # set the modified flag so central directory gets written
1236 # even if no files are added to the archive
1237 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001238 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001239 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001240 except (AttributeError, OSError):
1241 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001242 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001243 self._seekable = False
1244 else:
1245 # Some file-like objects can provide tell() but not seek()
1246 try:
1247 self.fp.seek(self.start_dir)
1248 except (AttributeError, OSError):
1249 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001250 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001251 try:
1252 # See if file is a zip file
1253 self._RealGetContents()
1254 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001255 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001256 except BadZipFile:
1257 # file is not a zip file, just append
1258 self.fp.seek(0, 2)
1259
1260 # set the modified flag so central directory gets written
1261 # even if no files are added to the archive
1262 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001263 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001264 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001265 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001266 except:
1267 fp = self.fp
1268 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001269 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001270 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001271
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001272 def __enter__(self):
1273 return self
1274
1275 def __exit__(self, type, value, traceback):
1276 self.close()
1277
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001278 def __repr__(self):
1279 result = ['<%s.%s' % (self.__class__.__module__,
1280 self.__class__.__qualname__)]
1281 if self.fp is not None:
1282 if self._filePassed:
1283 result.append(' file=%r' % self.fp)
1284 elif self.filename is not None:
1285 result.append(' filename=%r' % self.filename)
1286 result.append(' mode=%r' % self.mode)
1287 else:
1288 result.append(' [closed]')
1289 result.append('>')
1290 return ''.join(result)
1291
Tim Peters7d3bad62001-04-04 18:56:49 +00001292 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001293 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001294 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001295 try:
1296 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001297 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001298 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001299 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001300 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001302 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001303 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1304 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001305 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001306
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001307 # "concat" is zero, unless zip was concatenated to another file
1308 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001309 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1310 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001311 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001312
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001314 inferred = concat + offset_cd
1315 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001317 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001319 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001320 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 total = 0
1322 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001323 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001324 if len(centdir) != sizeCentralDir:
1325 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001327 if centdir[_CD_SIGNATURE] != stringCentralDir:
1328 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001329 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001330 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001331 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001332 flags = centdir[5]
1333 if flags & 0x800:
1334 # UTF-8 file names extension
1335 filename = filename.decode('utf-8')
1336 else:
1337 # Historical ZIP filename encoding
1338 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001340 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001341 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1342 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001345 x.flag_bits, x.compress_type, t, d,
1346 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001347 if x.extract_version > MAX_EXTRACT_VERSION:
1348 raise NotImplementedError("zip file version %.1f" %
1349 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1351 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001352 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001353 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001354 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001355
1356 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001357 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 self.filelist.append(x)
1359 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001360
1361 # update total bytes read from central directory
1362 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1363 + centdir[_CD_EXTRA_FIELD_LENGTH]
1364 + centdir[_CD_COMMENT_LENGTH])
1365
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001367 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001368
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001369
1370 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001371 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001372 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373
1374 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001375 """Return a list of class ZipInfo instances for files in the
1376 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001377 return self.filelist
1378
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001379 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001380 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001381 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1382 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001383 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001384 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001385 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1386 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001387
1388 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001389 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001390 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001391 for zinfo in self.filelist:
1392 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001393 # Read by chunks, to avoid an OverflowError or a
1394 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001395 with self.open(zinfo.filename, "r") as f:
1396 while f.read(chunk_size): # Check CRC-32
1397 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001398 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001399 return zinfo.filename
1400
1401 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001402 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001403 info = self.NameToInfo.get(name)
1404 if info is None:
1405 raise KeyError(
1406 'There is no item named %r in the archive' % name)
1407
1408 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001409
Thomas Wouterscf297e42007-02-23 15:07:44 +00001410 def setpassword(self, pwd):
1411 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001412 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001413 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001414 if pwd:
1415 self.pwd = pwd
1416 else:
1417 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001418
R David Murrayf50b38a2012-04-12 18:44:58 -04001419 @property
1420 def comment(self):
1421 """The comment text associated with the ZIP file."""
1422 return self._comment
1423
1424 @comment.setter
1425 def comment(self, comment):
1426 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001427 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001428 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001429 if len(comment) > ZIP_MAX_COMMENT:
1430 import warnings
1431 warnings.warn('Archive comment is too long; truncating to %d bytes'
1432 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001433 comment = comment[:ZIP_MAX_COMMENT]
1434 self._comment = comment
1435 self._didModify = True
1436
Thomas Wouterscf297e42007-02-23 15:07:44 +00001437 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001438 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001439 with self.open(name, "r", pwd) as fp:
1440 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001441
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001442 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001443 """Return file-like object for 'name'.
1444
1445 name is a string for the file name within the ZIP file, or a ZipInfo
1446 object.
1447
1448 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1449 write to a file newly added to the archive.
1450
1451 pwd is the password to decrypt files (only used for reading).
1452
1453 When writing, if the file size is not known in advance but may exceed
1454 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1455 files. If the size is known in advance, it is best to pass a ZipInfo
1456 instance for name, with zinfo.file_size set.
1457 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001458 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001459 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001460 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001461 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001462 if pwd and (mode == "w"):
1463 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001464 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001465 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001466 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001467
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001468 # Make sure we have an info object
1469 if isinstance(name, ZipInfo):
1470 # 'name' is already an info object
1471 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001472 elif mode == 'w':
1473 zinfo = ZipInfo(name)
1474 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001475 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001476 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001477 # Get info object for name
1478 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001479
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001480 if mode == 'w':
1481 return self._open_to_write(zinfo, force_zip64=force_zip64)
1482
1483 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001484 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001485 "is an open writing handle on it. "
1486 "Close the writing handle before trying to read.")
1487
1488 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001489 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001490 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1491 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001492 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001493 # Skip the file header:
1494 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001495 if len(fheader) != sizeFileHeader:
1496 raise BadZipFile("Truncated file header")
1497 fheader = struct.unpack(structFileHeader, fheader)
1498 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001499 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001500
Antoine Pitrou17babc52012-11-17 23:50:08 +01001501 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1502 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1503 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001504
Antoine Pitrou8572da52012-11-17 23:52:05 +01001505 if zinfo.flag_bits & 0x20:
1506 # Zip 2.7: compressed patched data
1507 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001508
Antoine Pitrou8572da52012-11-17 23:52:05 +01001509 if zinfo.flag_bits & 0x40:
1510 # strong encryption
1511 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001512
Antoine Pitrou17babc52012-11-17 23:50:08 +01001513 if zinfo.flag_bits & 0x800:
1514 # UTF-8 filename
1515 fname_str = fname.decode("utf-8")
1516 else:
1517 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001518
Antoine Pitrou17babc52012-11-17 23:50:08 +01001519 if fname_str != zinfo.orig_filename:
1520 raise BadZipFile(
1521 'File name in directory %r and header %r differ.'
1522 % (zinfo.orig_filename, fname))
1523
1524 # check for encrypted flag & handle password
1525 is_encrypted = zinfo.flag_bits & 0x1
1526 zd = None
1527 if is_encrypted:
1528 if not pwd:
1529 pwd = self.pwd
1530 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001531 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001532 "required for extraction" % name)
1533
1534 zd = _ZipDecrypter(pwd)
1535 # The first 12 bytes in the cypher stream is an encryption header
1536 # used to strengthen the algorithm. The first 11 bytes are
1537 # completely random, while the 12th contains the MSB of the CRC,
1538 # or the MSB of the file time depending on the header type
1539 # and is used to check the correctness of the password.
1540 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001541 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001542 if zinfo.flag_bits & 0x8:
1543 # compare against the file type from extended local headers
1544 check_byte = (zinfo._raw_time >> 8) & 0xff
1545 else:
1546 # compare against the CRC otherwise
1547 check_byte = (zinfo.CRC >> 24) & 0xff
1548 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001549 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001550
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001551 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001552 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001553 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001554 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001555
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001556 def _open_to_write(self, zinfo, force_zip64=False):
1557 if force_zip64 and not self._allowZip64:
1558 raise ValueError(
1559 "force_zip64 is True, but allowZip64 was False when opening "
1560 "the ZIP file."
1561 )
1562 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001563 raise ValueError("Can't write to the ZIP file while there is "
1564 "another write handle open on it. "
1565 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001566
1567 # Sizes and CRC are overwritten with correct data after processing the file
1568 if not hasattr(zinfo, 'file_size'):
1569 zinfo.file_size = 0
1570 zinfo.compress_size = 0
1571 zinfo.CRC = 0
1572
1573 zinfo.flag_bits = 0x00
1574 if zinfo.compress_type == ZIP_LZMA:
1575 # Compressed data includes an end-of-stream (EOS) marker
1576 zinfo.flag_bits |= 0x02
1577 if not self._seekable:
1578 zinfo.flag_bits |= 0x08
1579
1580 if not zinfo.external_attr:
1581 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1582
1583 # Compressed size can be larger than uncompressed size
1584 zip64 = self._allowZip64 and \
1585 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1586
1587 if self._seekable:
1588 self.fp.seek(self.start_dir)
1589 zinfo.header_offset = self.fp.tell()
1590
1591 self._writecheck(zinfo)
1592 self._didModify = True
1593
1594 self.fp.write(zinfo.FileHeader(zip64))
1595
1596 self._writing = True
1597 return _ZipWriteFile(self, zinfo, zip64)
1598
Christian Heimes790c8232008-01-07 21:14:23 +00001599 def extract(self, member, path=None, pwd=None):
1600 """Extract a member from the archive to the current working directory,
1601 using its full name. Its file information is extracted as accurately
1602 as possible. `member' may be a filename or a ZipInfo object. You can
1603 specify a different directory using `path'.
1604 """
Christian Heimes790c8232008-01-07 21:14:23 +00001605 if path is None:
1606 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001607 else:
1608 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001609
1610 return self._extract_member(member, path, pwd)
1611
1612 def extractall(self, path=None, members=None, pwd=None):
1613 """Extract all members from the archive to the current working
1614 directory. `path' specifies a different directory to extract to.
1615 `members' is optional and must be a subset of the list returned
1616 by namelist().
1617 """
1618 if members is None:
1619 members = self.namelist()
1620
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001621 if path is None:
1622 path = os.getcwd()
1623 else:
1624 path = os.fspath(path)
1625
Christian Heimes790c8232008-01-07 21:14:23 +00001626 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001627 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001628
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001629 @classmethod
1630 def _sanitize_windows_name(cls, arcname, pathsep):
1631 """Replace bad characters and remove trailing dots from parts."""
1632 table = cls._windows_illegal_name_trans_table
1633 if not table:
1634 illegal = ':<>|"?*'
1635 table = str.maketrans(illegal, '_' * len(illegal))
1636 cls._windows_illegal_name_trans_table = table
1637 arcname = arcname.translate(table)
1638 # remove trailing dots
1639 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1640 # rejoin, removing empty parts.
1641 arcname = pathsep.join(x for x in arcname if x)
1642 return arcname
1643
Christian Heimes790c8232008-01-07 21:14:23 +00001644 def _extract_member(self, member, targetpath, pwd):
1645 """Extract the ZipInfo object 'member' to a physical
1646 file on the path targetpath.
1647 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001648 if not isinstance(member, ZipInfo):
1649 member = self.getinfo(member)
1650
Christian Heimes790c8232008-01-07 21:14:23 +00001651 # build the destination pathname, replacing
1652 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001653 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001654
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001655 if os.path.altsep:
1656 arcname = arcname.replace(os.path.altsep, os.path.sep)
1657 # interpret absolute pathname as relative, remove drive letter or
1658 # UNC path, redundant separators, "." and ".." components.
1659 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001660 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001661 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001662 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001663 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001664 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001665 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001666
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001667 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001668 targetpath = os.path.normpath(targetpath)
1669
1670 # Create all upper directories if necessary.
1671 upperdirs = os.path.dirname(targetpath)
1672 if upperdirs and not os.path.exists(upperdirs):
1673 os.makedirs(upperdirs)
1674
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001675 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001676 if not os.path.isdir(targetpath):
1677 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001678 return targetpath
1679
Antoine Pitrou17babc52012-11-17 23:50:08 +01001680 with self.open(member, pwd=pwd) as source, \
1681 open(targetpath, "wb") as target:
1682 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001683
1684 return targetpath
1685
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001686 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001687 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001688 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001689 import warnings
1690 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001691 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001692 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001693 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001694 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001695 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001696 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001697 if not self._allowZip64:
1698 requires_zip64 = None
1699 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1700 requires_zip64 = "Files count"
1701 elif zinfo.file_size > ZIP64_LIMIT:
1702 requires_zip64 = "Filesize"
1703 elif zinfo.header_offset > ZIP64_LIMIT:
1704 requires_zip64 = "Zipfile size"
1705 if requires_zip64:
1706 raise LargeZipFile(requires_zip64 +
1707 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001708
Bo Baylesce237c72018-01-29 23:54:07 -06001709 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001710 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001711 """Put the bytes from filename into the archive under the name
1712 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001713 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001714 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001715 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001716 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001717 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001718 "Can't write to ZIP archive while an open writing handle exists"
1719 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001720
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001721 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001722 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001723
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001724 if zinfo.is_dir():
1725 zinfo.compress_size = 0
1726 zinfo.CRC = 0
1727 else:
1728 if compress_type is not None:
1729 zinfo.compress_type = compress_type
1730 else:
1731 zinfo.compress_type = self.compression
1732
Bo Baylesce237c72018-01-29 23:54:07 -06001733 if compresslevel is not None:
1734 zinfo._compresslevel = compresslevel
1735 else:
1736 zinfo._compresslevel = self.compresslevel
1737
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001738 if zinfo.is_dir():
1739 with self._lock:
1740 if self._seekable:
1741 self.fp.seek(self.start_dir)
1742 zinfo.header_offset = self.fp.tell() # Start of header bytes
1743 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001744 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001745 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001746
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001747 self._writecheck(zinfo)
1748 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001749
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001750 self.filelist.append(zinfo)
1751 self.NameToInfo[zinfo.filename] = zinfo
1752 self.fp.write(zinfo.FileHeader(False))
1753 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001754 else:
1755 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1756 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001757
Bo Baylesce237c72018-01-29 23:54:07 -06001758 def writestr(self, zinfo_or_arcname, data,
1759 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001760 """Write a file into the archive. The contents is 'data', which
1761 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1762 it is encoded as UTF-8 first.
1763 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001764 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001765 if isinstance(data, str):
1766 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001767 if not isinstance(zinfo_or_arcname, ZipInfo):
1768 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001769 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001770 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001771 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001772 if zinfo.filename[-1] == '/':
1773 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1774 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1775 else:
1776 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001777 else:
1778 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001779
1780 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001781 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001782 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001783 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001784 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001785 "Can't write to ZIP archive while an open writing handle exists."
1786 )
1787
1788 if compress_type is not None:
1789 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001790
Bo Baylesce237c72018-01-29 23:54:07 -06001791 if compresslevel is not None:
1792 zinfo._compresslevel = compresslevel
1793
Guido van Rossum85825dc2007-08-27 17:03:28 +00001794 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001795 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001796 with self.open(zinfo, mode='w') as dest:
1797 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001798
1799 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001800 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001801 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001802
1803 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001804 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001805 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001806 if self.fp is None:
1807 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001808
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001809 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001810 raise ValueError("Can't close the ZIP file while there is "
1811 "an open writing handle on it. "
1812 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001813
Antoine Pitrou17babc52012-11-17 23:50:08 +01001814 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001815 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001816 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001817 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001818 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001819 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001820 finally:
1821 fp = self.fp
1822 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001823 self._fpclose(fp)
1824
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001825 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001826 for zinfo in self.filelist: # write central directory
1827 dt = zinfo.date_time
1828 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1829 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1830 extra = []
1831 if zinfo.file_size > ZIP64_LIMIT \
1832 or zinfo.compress_size > ZIP64_LIMIT:
1833 extra.append(zinfo.file_size)
1834 extra.append(zinfo.compress_size)
1835 file_size = 0xffffffff
1836 compress_size = 0xffffffff
1837 else:
1838 file_size = zinfo.file_size
1839 compress_size = zinfo.compress_size
1840
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001841 if zinfo.header_offset > ZIP64_LIMIT:
1842 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001843 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001844 else:
1845 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001846
1847 extra_data = zinfo.extra
1848 min_version = 0
1849 if extra:
1850 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001851 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001852 extra_data = struct.pack(
1853 '<HH' + 'Q'*len(extra),
1854 1, 8*len(extra), *extra) + extra_data
1855
1856 min_version = ZIP64_VERSION
1857
1858 if zinfo.compress_type == ZIP_BZIP2:
1859 min_version = max(BZIP2_VERSION, min_version)
1860 elif zinfo.compress_type == ZIP_LZMA:
1861 min_version = max(LZMA_VERSION, min_version)
1862
1863 extract_version = max(min_version, zinfo.extract_version)
1864 create_version = max(min_version, zinfo.create_version)
1865 try:
1866 filename, flag_bits = zinfo._encodeFilenameFlags()
1867 centdir = struct.pack(structCentralDir,
1868 stringCentralDir, create_version,
1869 zinfo.create_system, extract_version, zinfo.reserved,
1870 flag_bits, zinfo.compress_type, dostime, dosdate,
1871 zinfo.CRC, compress_size, file_size,
1872 len(filename), len(extra_data), len(zinfo.comment),
1873 0, zinfo.internal_attr, zinfo.external_attr,
1874 header_offset)
1875 except DeprecationWarning:
1876 print((structCentralDir, stringCentralDir, create_version,
1877 zinfo.create_system, extract_version, zinfo.reserved,
1878 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1879 zinfo.CRC, compress_size, file_size,
1880 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1881 0, zinfo.internal_attr, zinfo.external_attr,
1882 header_offset), file=sys.stderr)
1883 raise
1884 self.fp.write(centdir)
1885 self.fp.write(filename)
1886 self.fp.write(extra_data)
1887 self.fp.write(zinfo.comment)
1888
1889 pos2 = self.fp.tell()
1890 # Write end-of-zip-archive record
1891 centDirCount = len(self.filelist)
1892 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001893 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001894 requires_zip64 = None
1895 if centDirCount > ZIP_FILECOUNT_LIMIT:
1896 requires_zip64 = "Files count"
1897 elif centDirOffset > ZIP64_LIMIT:
1898 requires_zip64 = "Central directory offset"
1899 elif centDirSize > ZIP64_LIMIT:
1900 requires_zip64 = "Central directory size"
1901 if requires_zip64:
1902 # Need to write the ZIP64 end-of-archive records
1903 if not self._allowZip64:
1904 raise LargeZipFile(requires_zip64 +
1905 " would require ZIP64 extensions")
1906 zip64endrec = struct.pack(
1907 structEndArchive64, stringEndArchive64,
1908 44, 45, 45, 0, 0, centDirCount, centDirCount,
1909 centDirSize, centDirOffset)
1910 self.fp.write(zip64endrec)
1911
1912 zip64locrec = struct.pack(
1913 structEndArchive64Locator,
1914 stringEndArchive64Locator, 0, pos2, 1)
1915 self.fp.write(zip64locrec)
1916 centDirCount = min(centDirCount, 0xFFFF)
1917 centDirSize = min(centDirSize, 0xFFFFFFFF)
1918 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1919
1920 endrec = struct.pack(structEndArchive, stringEndArchive,
1921 0, 0, centDirCount, centDirCount,
1922 centDirSize, centDirOffset, len(self._comment))
1923 self.fp.write(endrec)
1924 self.fp.write(self._comment)
1925 self.fp.flush()
1926
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001927 def _fpclose(self, fp):
1928 assert self._fileRefCnt > 0
1929 self._fileRefCnt -= 1
1930 if not self._fileRefCnt and not self._filePassed:
1931 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001932
1933
1934class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001935 """Class to create ZIP archives with Python library files and packages."""
1936
Georg Brandl8334fd92010-12-04 10:26:46 +00001937 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001938 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001939 ZipFile.__init__(self, file, mode=mode, compression=compression,
1940 allowZip64=allowZip64)
1941 self._optimize = optimize
1942
Christian Tismer59202e52013-10-21 03:59:23 +02001943 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001944 """Add all files from "pathname" to the ZIP archive.
1945
Fred Drake484d7352000-10-02 21:14:52 +00001946 If pathname is a package directory, search the directory and
1947 all package subdirectories recursively for all *.py and enter
1948 the modules into the archive. If pathname is a plain
1949 directory, listdir *.py and enter all modules. Else, pathname
1950 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001951 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001952 This method will compile the module.py into module.pyc if
1953 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001954 If filterfunc(pathname) is given, it is called with every argument.
1955 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001956 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001957 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001958 if filterfunc and not filterfunc(pathname):
1959 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001960 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001961 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001962 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001963 dir, name = os.path.split(pathname)
1964 if os.path.isdir(pathname):
1965 initname = os.path.join(pathname, "__init__.py")
1966 if os.path.isfile(initname):
1967 # This is a package directory, add it
1968 if basename:
1969 basename = "%s/%s" % (basename, name)
1970 else:
1971 basename = name
1972 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001973 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001974 fname, arcname = self._get_codename(initname[0:-3], basename)
1975 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001976 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001977 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001978 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001979 dirlist.remove("__init__.py")
1980 # Add all *.py files and package subdirectories
1981 for filename in dirlist:
1982 path = os.path.join(pathname, filename)
1983 root, ext = os.path.splitext(filename)
1984 if os.path.isdir(path):
1985 if os.path.isfile(os.path.join(path, "__init__.py")):
1986 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001987 self.writepy(path, basename,
1988 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001989 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001990 if filterfunc and not filterfunc(path):
1991 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001992 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001993 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001994 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001995 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001996 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001997 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001998 self.write(fname, arcname)
1999 else:
2000 # This is NOT a package directory, add its files at top level
2001 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002002 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002003 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002004 path = os.path.join(pathname, filename)
2005 root, ext = os.path.splitext(filename)
2006 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002007 if filterfunc and not filterfunc(path):
2008 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002009 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002010 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002011 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002012 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002013 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002014 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002015 self.write(fname, arcname)
2016 else:
2017 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002018 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002019 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002020 fname, arcname = self._get_codename(pathname[0:-3], basename)
2021 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002022 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002023 self.write(fname, arcname)
2024
2025 def _get_codename(self, pathname, basename):
2026 """Return (filename, archivename) for the path.
2027
Fred Drake484d7352000-10-02 21:14:52 +00002028 Given a module name path, return the correct file path and
2029 archive name, compiling if necessary. For example, given
2030 /python/lib/string, return (/python/lib/string.pyc, string).
2031 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002032 def _compile(file, optimize=-1):
2033 import py_compile
2034 if self.debug:
2035 print("Compiling", file)
2036 try:
2037 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002038 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002039 print(err.msg)
2040 return False
2041 return True
2042
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002043 file_py = pathname + ".py"
2044 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002045 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2046 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2047 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002048 if self._optimize == -1:
2049 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002050 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002051 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2052 # Use .pyc file.
2053 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002054 elif (os.path.isfile(pycache_opt0) and
2055 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002056 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2057 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002058 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002059 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002060 elif (os.path.isfile(pycache_opt1) and
2061 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2062 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002063 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002064 fname = pycache_opt1
2065 arcname = file_pyc
2066 elif (os.path.isfile(pycache_opt2) and
2067 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2068 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2069 # file name in the archive.
2070 fname = pycache_opt2
2071 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002072 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002073 # Compile py into PEP 3147 pyc file.
2074 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002075 if sys.flags.optimize == 0:
2076 fname = pycache_opt0
2077 elif sys.flags.optimize == 1:
2078 fname = pycache_opt1
2079 else:
2080 fname = pycache_opt2
2081 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002082 else:
2083 fname = arcname = file_py
2084 else:
2085 # new mode: use given optimization level
2086 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002087 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002088 arcname = file_pyc
2089 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002090 arcname = file_pyc
2091 if self._optimize == 1:
2092 fname = pycache_opt1
2093 elif self._optimize == 2:
2094 fname = pycache_opt2
2095 else:
2096 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2097 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002098 if not (os.path.isfile(fname) and
2099 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2100 if not _compile(file_py, optimize=self._optimize):
2101 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002102 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002103 if basename:
2104 archivename = "%s/%s" % (basename, archivename)
2105 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002106
2107
Miss Islington (bot)c410f382019-08-24 09:03:52 -07002108def _unique_everseen(iterable, key=None):
2109 "List unique elements, preserving order. Remember all elements ever seen."
2110 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2111 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2112 seen = set()
2113 seen_add = seen.add
2114 if key is None:
2115 for element in itertools.filterfalse(seen.__contains__, iterable):
2116 seen_add(element)
2117 yield element
2118 else:
2119 for element in iterable:
2120 k = key(element)
2121 if k not in seen:
2122 seen_add(k)
2123 yield element
2124
2125
2126def _parents(path):
2127 """
2128 Given a path with elements separated by
2129 posixpath.sep, generate all parents of that path.
2130
2131 >>> list(_parents('b/d'))
2132 ['b']
2133 >>> list(_parents('/b/d/'))
2134 ['/b']
2135 >>> list(_parents('b/d/f/'))
2136 ['b/d', 'b']
2137 >>> list(_parents('b'))
2138 []
2139 >>> list(_parents(''))
2140 []
2141 """
2142 return itertools.islice(_ancestry(path), 1, None)
2143
2144
2145def _ancestry(path):
2146 """
2147 Given a path with elements separated by
2148 posixpath.sep, generate all elements of that path
2149
2150 >>> list(_ancestry('b/d'))
2151 ['b/d', 'b']
2152 >>> list(_ancestry('/b/d/'))
2153 ['/b/d', '/b']
2154 >>> list(_ancestry('b/d/f/'))
2155 ['b/d/f', 'b/d', 'b']
2156 >>> list(_ancestry('b'))
2157 ['b']
2158 >>> list(_ancestry(''))
2159 []
2160 """
2161 path = path.rstrip(posixpath.sep)
2162 while path and path != posixpath.sep:
2163 yield path
2164 path, tail = posixpath.split(path)
2165
2166
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002167class Path:
2168 """
2169 A pathlib-compatible interface for zip files.
2170
2171 Consider a zip file with this structure::
2172
2173 .
2174 ├── a.txt
2175 └── b
2176 ├── c.txt
2177 └── d
2178 └── e.txt
2179
2180 >>> data = io.BytesIO()
2181 >>> zf = ZipFile(data, 'w')
2182 >>> zf.writestr('a.txt', 'content of a')
2183 >>> zf.writestr('b/c.txt', 'content of c')
2184 >>> zf.writestr('b/d/e.txt', 'content of e')
2185 >>> zf.filename = 'abcde.zip'
2186
2187 Path accepts the zipfile object itself or a filename
2188
2189 >>> root = Path(zf)
2190
2191 From there, several path operations are available.
2192
2193 Directory iteration (including the zip file itself):
2194
2195 >>> a, b = root.iterdir()
2196 >>> a
2197 Path('abcde.zip', 'a.txt')
2198 >>> b
2199 Path('abcde.zip', 'b/')
2200
2201 name property:
2202
2203 >>> b.name
2204 'b'
2205
2206 join with divide operator:
2207
2208 >>> c = b / 'c.txt'
2209 >>> c
2210 Path('abcde.zip', 'b/c.txt')
2211 >>> c.name
2212 'c.txt'
2213
2214 Read text:
2215
2216 >>> c.read_text()
2217 'content of c'
2218
2219 existence:
2220
2221 >>> c.exists()
2222 True
2223 >>> (b / 'missing.txt').exists()
2224 False
2225
Xtreak0d702272019-06-03 04:42:33 +05302226 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002227
2228 >>> str(c)
2229 'abcde.zip/b/c.txt'
2230 """
2231
2232 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2233
2234 def __init__(self, root, at=""):
2235 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2236 self.at = at
2237
2238 @property
2239 def open(self):
2240 return functools.partial(self.root.open, self.at)
2241
2242 @property
2243 def name(self):
2244 return posixpath.basename(self.at.rstrip("/"))
2245
2246 def read_text(self, *args, **kwargs):
2247 with self.open() as strm:
2248 return io.TextIOWrapper(strm, *args, **kwargs).read()
2249
2250 def read_bytes(self):
2251 with self.open() as strm:
2252 return strm.read()
2253
2254 def _is_child(self, path):
2255 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2256
2257 def _next(self, at):
2258 return Path(self.root, at)
2259
2260 def is_dir(self):
2261 return not self.at or self.at.endswith("/")
2262
2263 def is_file(self):
2264 return not self.is_dir()
2265
2266 def exists(self):
2267 return self.at in self._names()
2268
2269 def iterdir(self):
2270 if not self.is_dir():
2271 raise ValueError("Can't listdir a file")
2272 subs = map(self._next, self._names())
2273 return filter(self._is_child, subs)
2274
2275 def __str__(self):
2276 return posixpath.join(self.root.filename, self.at)
2277
2278 def __repr__(self):
2279 return self.__repr.format(self=self)
2280
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002281 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002282 next = posixpath.join(self.at, add)
2283 next_dir = posixpath.join(self.at, add, "")
2284 names = self._names()
2285 return self._next(next_dir if next not in names and next_dir in names else next)
2286
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002287 __truediv__ = joinpath
2288
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002289 @staticmethod
Miss Islington (bot)c410f382019-08-24 09:03:52 -07002290 def _implied_dirs(names):
2291 return _unique_everseen(
2292 parent + "/"
2293 for name in names
2294 for parent in _parents(name)
2295 if parent + "/" not in names
2296 )
2297
2298 @classmethod
2299 def _add_implied_dirs(cls, names):
2300 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002301
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002302 @property
2303 def parent(self):
Miss Islington (bot)66905d12019-07-07 15:05:53 -07002304 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002305 if parent_at:
2306 parent_at += '/'
2307 return self._next(parent_at)
2308
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002309 def _names(self):
2310 return self._add_implied_dirs(self.root.namelist())
2311
2312
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002313def main(args=None):
2314 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002315
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002316 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002317 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002318 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002319 group.add_argument('-l', '--list', metavar='<zipfile>',
2320 help='Show listing of a zipfile')
2321 group.add_argument('-e', '--extract', nargs=2,
2322 metavar=('<zipfile>', '<output_dir>'),
2323 help='Extract zipfile into target dir')
2324 group.add_argument('-c', '--create', nargs='+',
2325 metavar=('<name>', '<file>'),
2326 help='Create zipfile from sources')
2327 group.add_argument('-t', '--test', metavar='<zipfile>',
2328 help='Test if a zipfile is valid')
2329 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002330
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002331 if args.test is not None:
2332 src = args.test
2333 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002334 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002335 if badfile:
2336 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002337 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002338
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002339 elif args.list is not None:
2340 src = args.list
2341 with ZipFile(src, 'r') as zf:
2342 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002343
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002344 elif args.extract is not None:
2345 src, curdir = args.extract
2346 with ZipFile(src, 'r') as zf:
2347 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002348
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002349 elif args.create is not None:
2350 zip_name = args.create.pop(0)
2351 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002352
2353 def addToZip(zf, path, zippath):
2354 if os.path.isfile(path):
2355 zf.write(path, zippath, ZIP_DEFLATED)
2356 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002357 if zippath:
2358 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002359 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002360 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002361 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002362 # else: ignore
2363
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002364 with ZipFile(zip_name, 'w') as zf:
2365 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002366 zippath = os.path.basename(path)
2367 if not zippath:
2368 zippath = os.path.basename(os.path.dirname(path))
2369 if zippath in ('', os.curdir, os.pardir):
2370 zippath = ''
2371 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002372
2373if __name__ == "__main__":
2374 main()