blob: b0afb9da942b12300151da7e7b18bf5ff55b99ee [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
shireenraoa4e29912019-08-24 11:26:41 -040010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200379 self.compress_size = 0 # Size of the compressed file
380 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000382 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000383 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200468 data = extra[4:ln+4]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 # ZIP64 extension (large files and/or large archives)
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200470 try:
471 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
472 field = "File size"
473 self.file_size, = unpack('<Q', data[:8])
474 data = data[8:]
475 if self.compress_size == 0xFFFF_FFFF:
476 field = "Compress size"
477 self.compress_size, = unpack('<Q', data[:8])
478 data = data[8:]
479 if self.header_offset == 0xFFFF_FFFF:
480 field = "Header offset"
481 self.header_offset, = unpack('<Q', data[:8])
482 except struct.error:
483 raise BadZipFile(f"Corrupt zip64 extra field. "
484 f"{field} not found.") from None
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000485
486 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000487
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200488 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200489 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200490 """Construct an appropriate ZipInfo for a file on the filesystem.
491
492 filename should be the path to a file or directory on the filesystem.
493
494 arcname is the name which it will have within the archive (by default,
495 this will be the same as filename, but without a drive letter and with
496 leading path separators removed).
497 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200498 if isinstance(filename, os.PathLike):
499 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200500 st = os.stat(filename)
501 isdir = stat.S_ISDIR(st.st_mode)
502 mtime = time.localtime(st.st_mtime)
503 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200504 if not strict_timestamps and date_time[0] < 1980:
505 date_time = (1980, 1, 1, 0, 0, 0)
506 elif not strict_timestamps and date_time[0] > 2107:
507 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200508 # Create ZipInfo instance to store file information
509 if arcname is None:
510 arcname = filename
511 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
512 while arcname[0] in (os.sep, os.altsep):
513 arcname = arcname[1:]
514 if isdir:
515 arcname += '/'
516 zinfo = cls(arcname, date_time)
517 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
518 if isdir:
519 zinfo.file_size = 0
520 zinfo.external_attr |= 0x10 # MS-DOS directory flag
521 else:
522 zinfo.file_size = st.st_size
523
524 return zinfo
525
526 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300527 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200528 return self.filename[-1] == '/'
529
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000530
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300531# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
532# internal keys. We noticed that a direct implementation is faster than
533# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000534
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300535_crctable = None
536def _gen_crc(crc):
537 for j in range(8):
538 if crc & 1:
539 crc = (crc >> 1) ^ 0xEDB88320
540 else:
541 crc >>= 1
542 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300544# ZIP supports a password-based form of encryption. Even though known
545# plaintext attacks have been found against it, it is still useful
546# to be able to get data out of such a file.
547#
548# Usage:
549# zd = _ZipDecrypter(mypwd)
550# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552def _ZipDecrypter(pwd):
553 key0 = 305419896
554 key1 = 591751049
555 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000556
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300557 global _crctable
558 if _crctable is None:
559 _crctable = list(map(_gen_crc, range(256)))
560 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000561
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300562 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300564 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 def update_keys(c):
567 nonlocal key0, key1, key2
568 key0 = crc32(c, key0)
569 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
570 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
571 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573 for p in pwd:
574 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000575
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300576 def decrypter(data):
577 """Decrypt a bytes object."""
578 result = bytearray()
579 append = result.append
580 for c in data:
581 k = key2 | 2
582 c ^= ((k * (k^1)) >> 8) & 0xFF
583 update_keys(c)
584 append(c)
585 return bytes(result)
586
587 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000588
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200589
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200590class LZMACompressor:
591
592 def __init__(self):
593 self._comp = None
594
595 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200596 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200597 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200598 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599 ])
600 return struct.pack('<BBH', 9, 4, len(props)) + props
601
602 def compress(self, data):
603 if self._comp is None:
604 return self._init() + self._comp.compress(data)
605 return self._comp.compress(data)
606
607 def flush(self):
608 if self._comp is None:
609 return self._init() + self._comp.flush()
610 return self._comp.flush()
611
612
613class LZMADecompressor:
614
615 def __init__(self):
616 self._decomp = None
617 self._unconsumed = b''
618 self.eof = False
619
620 def decompress(self, data):
621 if self._decomp is None:
622 self._unconsumed += data
623 if len(self._unconsumed) <= 4:
624 return b''
625 psize, = struct.unpack('<H', self._unconsumed[2:4])
626 if len(self._unconsumed) <= 4 + psize:
627 return b''
628
629 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200630 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
631 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200632 ])
633 data = self._unconsumed[4 + psize:]
634 del self._unconsumed
635
636 result = self._decomp.decompress(data)
637 self.eof = self._decomp.eof
638 return result
639
640
641compressor_names = {
642 0: 'store',
643 1: 'shrink',
644 2: 'reduce',
645 3: 'reduce',
646 4: 'reduce',
647 5: 'reduce',
648 6: 'implode',
649 7: 'tokenize',
650 8: 'deflate',
651 9: 'deflate64',
652 10: 'implode',
653 12: 'bzip2',
654 14: 'lzma',
655 18: 'terse',
656 19: 'lz77',
657 97: 'wavpack',
658 98: 'ppmd',
659}
660
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200661def _check_compression(compression):
662 if compression == ZIP_STORED:
663 pass
664 elif compression == ZIP_DEFLATED:
665 if not zlib:
666 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200667 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 elif compression == ZIP_BZIP2:
669 if not bz2:
670 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200671 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200672 elif compression == ZIP_LZMA:
673 if not lzma:
674 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200675 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200676 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300677 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678
679
Bo Baylesce237c72018-01-29 23:54:07 -0600680def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200681 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600682 if compresslevel is not None:
683 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
684 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600686 if compresslevel is not None:
687 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200688 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600689 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200690 elif compress_type == ZIP_LZMA:
691 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200692 else:
693 return None
694
695
696def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300697 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200698 if compress_type == ZIP_STORED:
699 return None
700 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200701 return zlib.decompressobj(-15)
702 elif compress_type == ZIP_BZIP2:
703 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200704 elif compress_type == ZIP_LZMA:
705 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200706 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200707 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200708 if descr:
709 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
710 else:
711 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200712
713
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200714class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300715 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200716 self._file = file
717 self._pos = pos
718 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200719 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300720 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700721 self.seekable = file.seekable
722 self.tell = file.tell
723
724 def seek(self, offset, whence=0):
725 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200726 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700727 raise ValueError("Can't reposition in the ZIP file while "
728 "there is an open writing handle on it. "
729 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200730 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700731 self._pos = self._file.tell()
732 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200733
734 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200735 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300736 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300737 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300738 "is an open writing handle on it. "
739 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200740 self._file.seek(self._pos)
741 data = self._file.read(n)
742 self._pos = self._file.tell()
743 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200744
745 def close(self):
746 if self._file is not None:
747 fileobj = self._file
748 self._file = None
749 self._close(fileobj)
750
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200751# Provide the tell method for unseekable stream
752class _Tellable:
753 def __init__(self, fp):
754 self.fp = fp
755 self.offset = 0
756
757 def write(self, data):
758 n = self.fp.write(data)
759 self.offset += n
760 return n
761
762 def tell(self):
763 return self.offset
764
765 def flush(self):
766 self.fp.flush()
767
768 def close(self):
769 self.fp.close()
770
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200771
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000772class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 """File-like object for reading an archive member.
774 Is returned by ZipFile.open().
775 """
776
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 # Max size supported by decompressor.
778 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 # Read from compressed files in 4k blocks.
781 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782
John Jolly066df4f2018-01-30 01:51:35 -0700783 # Chunk size to read during seek
784 MAX_SEEK_READ = 1 << 24
785
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200786 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000787 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000788 self._fileobj = fileobj
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200789 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000790 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000791
Ezio Melotti92b47432010-01-28 01:44:41 +0000792 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000793 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200794 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000795
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200796 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200798 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000799 self._readbuffer = b''
800 self._offset = 0
801
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 self.newlines = None
803
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805 self.name = zipinfo.filename
806
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000807 if hasattr(zipinfo, 'CRC'):
808 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000809 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000810 else:
811 self._expected_crc = None
812
John Jolly066df4f2018-01-30 01:51:35 -0700813 self._seekable = False
814 try:
815 if fileobj.seekable():
816 self._orig_compress_start = fileobj.tell()
817 self._orig_compress_size = zipinfo.compress_size
818 self._orig_file_size = zipinfo.file_size
819 self._orig_start_crc = self._running_crc
820 self._seekable = True
821 except AttributeError:
822 pass
823
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200824 self._decrypter = None
825 if pwd:
826 if zipinfo.flag_bits & 0x8:
827 # compare against the file type from extended local headers
828 check_byte = (zipinfo._raw_time >> 8) & 0xff
829 else:
830 # compare against the CRC otherwise
831 check_byte = (zipinfo.CRC >> 24) & 0xff
832 h = self._init_decrypter()
833 if h != check_byte:
834 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
835
836
837 def _init_decrypter(self):
838 self._decrypter = _ZipDecrypter(self._pwd)
839 # The first 12 bytes in the cypher stream is an encryption header
840 # used to strengthen the algorithm. The first 11 bytes are
841 # completely random, while the 12th contains the MSB of the CRC,
842 # or the MSB of the file time depending on the header type
843 # and is used to check the correctness of the password.
844 header = self._fileobj.read(12)
845 self._compress_left -= 12
846 return self._decrypter(header)[11]
847
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200848 def __repr__(self):
849 result = ['<%s.%s' % (self.__class__.__module__,
850 self.__class__.__qualname__)]
851 if not self.closed:
852 result.append(' name=%r mode=%r' % (self.name, self.mode))
853 if self._compress_type != ZIP_STORED:
854 result.append(' compress_type=%s' %
855 compressor_names.get(self._compress_type,
856 self._compress_type))
857 else:
858 result.append(' [closed]')
859 result.append('>')
860 return ''.join(result)
861
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000862 def readline(self, limit=-1):
863 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000865 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
Serhiy Storchakae670be22016-06-11 19:32:44 +0300868 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000869 # Shortcut common case - newline found in buffer.
870 i = self._readbuffer.find(b'\n', self._offset) + 1
871 if i > 0:
872 line = self._readbuffer[self._offset: i]
873 self._offset = i
874 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875
Serhiy Storchakae670be22016-06-11 19:32:44 +0300876 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000877
878 def peek(self, n=1):
879 """Returns buffered bytes without advancing the position."""
880 if n > len(self._readbuffer) - self._offset:
881 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 if len(chunk) > self._offset:
883 self._readbuffer = chunk + self._readbuffer[self._offset:]
884 self._offset = 0
885 else:
886 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000887
888 # Return up to 512 bytes to reduce allocation overhead for tight loops.
889 return self._readbuffer[self._offset: self._offset + 512]
890
891 def readable(self):
892 return True
893
894 def read(self, n=-1):
895 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800896 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200898 if n is None or n < 0:
899 buf = self._readbuffer[self._offset:]
900 self._readbuffer = b''
901 self._offset = 0
902 while not self._eof:
903 buf += self._read1(self.MAX_N)
904 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905
Antoine Pitrou78157b32012-06-23 16:44:48 +0200906 end = n + self._offset
907 if end < len(self._readbuffer):
908 buf = self._readbuffer[self._offset:end]
909 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200910 return buf
911
Antoine Pitrou78157b32012-06-23 16:44:48 +0200912 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200913 buf = self._readbuffer[self._offset:]
914 self._readbuffer = b''
915 self._offset = 0
916 while n > 0 and not self._eof:
917 data = self._read1(n)
918 if n < len(data):
919 self._readbuffer = data
920 self._offset = n
921 buf += data[:n]
922 break
923 buf += data
924 n -= len(data)
925 return buf
926
927 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000928 # Update the CRC using the given data.
929 if self._expected_crc is None:
930 # No need to compute the CRC if we don't have a reference value
931 return
Martin Panterb82032f2015-12-11 05:19:29 +0000932 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000933 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000935 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000936
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000937 def read1(self, n):
938 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 if n is None or n < 0:
941 buf = self._readbuffer[self._offset:]
942 self._readbuffer = b''
943 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300944 while not self._eof:
945 data = self._read1(self.MAX_N)
946 if data:
947 buf += data
948 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200949 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000950
Antoine Pitrou78157b32012-06-23 16:44:48 +0200951 end = n + self._offset
952 if end < len(self._readbuffer):
953 buf = self._readbuffer[self._offset:end]
954 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200955 return buf
956
Antoine Pitrou78157b32012-06-23 16:44:48 +0200957 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200958 buf = self._readbuffer[self._offset:]
959 self._readbuffer = b''
960 self._offset = 0
961 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300962 while not self._eof:
963 data = self._read1(n)
964 if n < len(data):
965 self._readbuffer = data
966 self._offset = n
967 buf += data[:n]
968 break
969 if data:
970 buf += data
971 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200972 return buf
973
974 def _read1(self, n):
975 # Read up to n compressed bytes with at most one read() system call,
976 # decrypt and decompress them.
977 if self._eof or n <= 0:
978 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000979
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000980 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200981 if self._compress_type == ZIP_DEFLATED:
982 ## Handle unconsumed data.
983 data = self._decompressor.unconsumed_tail
984 if n > len(data):
985 data += self._read2(n - len(data))
986 else:
987 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200989 if self._compress_type == ZIP_STORED:
990 self._eof = self._compress_left <= 0
991 elif self._compress_type == ZIP_DEFLATED:
992 n = max(n, self.MIN_READ_SIZE)
993 data = self._decompressor.decompress(data, n)
994 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200995 self._compress_left <= 0 and
996 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200997 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000998 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200999 else:
1000 data = self._decompressor.decompress(data)
1001 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001002
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001003 data = data[:self._left]
1004 self._left -= len(data)
1005 if self._left <= 0:
1006 self._eof = True
1007 self._update_crc(data)
1008 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001009
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001010 def _read2(self, n):
1011 if self._compress_left <= 0:
1012 return b''
1013
1014 n = max(n, self.MIN_READ_SIZE)
1015 n = min(n, self._compress_left)
1016
1017 data = self._fileobj.read(n)
1018 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001019 if not data:
1020 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001021
1022 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001023 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001024 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001025
Łukasz Langae94980a2010-11-22 23:31:26 +00001026 def close(self):
1027 try:
1028 if self._close_fileobj:
1029 self._fileobj.close()
1030 finally:
1031 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032
John Jolly066df4f2018-01-30 01:51:35 -07001033 def seekable(self):
1034 return self._seekable
1035
1036 def seek(self, offset, whence=0):
1037 if not self._seekable:
1038 raise io.UnsupportedOperation("underlying stream is not seekable")
1039 curr_pos = self.tell()
1040 if whence == 0: # Seek from start of file
1041 new_pos = offset
1042 elif whence == 1: # Seek from current position
1043 new_pos = curr_pos + offset
1044 elif whence == 2: # Seek from EOF
1045 new_pos = self._orig_file_size + offset
1046 else:
1047 raise ValueError("whence must be os.SEEK_SET (0), "
1048 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1049
1050 if new_pos > self._orig_file_size:
1051 new_pos = self._orig_file_size
1052
1053 if new_pos < 0:
1054 new_pos = 0
1055
1056 read_offset = new_pos - curr_pos
1057 buff_offset = read_offset + self._offset
1058
1059 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1060 # Just move the _offset index if the new position is in the _readbuffer
1061 self._offset = buff_offset
1062 read_offset = 0
1063 elif read_offset < 0:
1064 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001065 self._fileobj.seek(self._orig_compress_start)
1066 self._running_crc = self._orig_start_crc
1067 self._compress_left = self._orig_compress_size
1068 self._left = self._orig_file_size
1069 self._readbuffer = b''
1070 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001071 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001072 self._eof = False
1073 read_offset = new_pos
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001074 if self._decrypter is not None:
1075 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001076
1077 while read_offset > 0:
1078 read_len = min(self.MAX_SEEK_READ, read_offset)
1079 self.read(read_len)
1080 read_offset -= read_len
1081
1082 return self.tell()
1083
1084 def tell(self):
1085 if not self._seekable:
1086 raise io.UnsupportedOperation("underlying stream is not seekable")
1087 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1088 return filepos
1089
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001090
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001091class _ZipWriteFile(io.BufferedIOBase):
1092 def __init__(self, zf, zinfo, zip64):
1093 self._zinfo = zinfo
1094 self._zip64 = zip64
1095 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001096 self._compressor = _get_compressor(zinfo.compress_type,
1097 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001098 self._file_size = 0
1099 self._compress_size = 0
1100 self._crc = 0
1101
1102 @property
1103 def _fileobj(self):
1104 return self._zipfile.fp
1105
1106 def writable(self):
1107 return True
1108
1109 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001110 if self.closed:
1111 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001112 nbytes = len(data)
1113 self._file_size += nbytes
1114 self._crc = crc32(data, self._crc)
1115 if self._compressor:
1116 data = self._compressor.compress(data)
1117 self._compress_size += len(data)
1118 self._fileobj.write(data)
1119 return nbytes
1120
1121 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001122 if self.closed:
1123 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001124 try:
1125 super().close()
1126 # Flush any data from the compressor, and update header info
1127 if self._compressor:
1128 buf = self._compressor.flush()
1129 self._compress_size += len(buf)
1130 self._fileobj.write(buf)
1131 self._zinfo.compress_size = self._compress_size
1132 else:
1133 self._zinfo.compress_size = self._file_size
1134 self._zinfo.CRC = self._crc
1135 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001136
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001137 # Write updated header info
1138 if self._zinfo.flag_bits & 0x08:
1139 # Write CRC and file sizes after the file data
1140 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1141 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1142 self._zinfo.compress_size, self._zinfo.file_size))
1143 self._zipfile.start_dir = self._fileobj.tell()
1144 else:
1145 if not self._zip64:
1146 if self._file_size > ZIP64_LIMIT:
1147 raise RuntimeError(
1148 'File size unexpectedly exceeded ZIP64 limit')
1149 if self._compress_size > ZIP64_LIMIT:
1150 raise RuntimeError(
1151 'Compressed size unexpectedly exceeded ZIP64 limit')
1152 # Seek backwards and write file header (which will now include
1153 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001154
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001155 # Preserve current position in file
1156 self._zipfile.start_dir = self._fileobj.tell()
1157 self._fileobj.seek(self._zinfo.header_offset)
1158 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1159 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001160
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001161 # Successfully written: Add file to our caches
1162 self._zipfile.filelist.append(self._zinfo)
1163 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1164 finally:
1165 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001166
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001167
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001168
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001169class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001170 """ Class with methods to open, read, write, close, list zip files.
1171
Bo Baylesce237c72018-01-29 23:54:07 -06001172 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1173 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001174
Fred Drake3d9091e2001-03-26 15:49:24 +00001175 file: Either the path to the file, or a file-like object.
1176 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001177 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1178 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001179 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1180 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001181 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1182 needed, otherwise it will raise an exception when this would
1183 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001184 compresslevel: None (default for the given compression type) or an integer
1185 specifying the level to pass to the compressor.
1186 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1187 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1188 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001189
Fred Drake3d9091e2001-03-26 15:49:24 +00001190 """
Fred Drake484d7352000-10-02 21:14:52 +00001191
Fred Drake90eac282001-02-28 05:29:34 +00001192 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001193 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001194
Bo Baylesce237c72018-01-29 23:54:07 -06001195 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001196 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001197 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1198 or append 'a'."""
1199 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001200 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001201
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001202 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001203
1204 self._allowZip64 = allowZip64
1205 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001206 self.debug = 0 # Level of printing: 0 through 3
1207 self.NameToInfo = {} # Find file info given name
1208 self.filelist = [] # List of ZipInfo instances for archive
1209 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001210 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001211 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001212 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001213 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001214 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001215
Fred Drake3d9091e2001-03-26 15:49:24 +00001216 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001217 if isinstance(file, os.PathLike):
1218 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001219 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001220 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001221 self._filePassed = 0
1222 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001223 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1224 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001225 filemode = modeDict[mode]
1226 while True:
1227 try:
1228 self.fp = io.open(file, filemode)
1229 except OSError:
1230 if filemode in modeDict:
1231 filemode = modeDict[filemode]
1232 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001233 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001234 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001235 else:
1236 self._filePassed = 1
1237 self.fp = file
1238 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001239 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001240 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001241 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001242 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001243
Antoine Pitrou17babc52012-11-17 23:50:08 +01001244 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001245 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001246 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001247 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001248 # set the modified flag so central directory gets written
1249 # even if no files are added to the archive
1250 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001251 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001252 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001253 except (AttributeError, OSError):
1254 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001255 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001256 self._seekable = False
1257 else:
1258 # Some file-like objects can provide tell() but not seek()
1259 try:
1260 self.fp.seek(self.start_dir)
1261 except (AttributeError, OSError):
1262 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001263 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001264 try:
1265 # See if file is a zip file
1266 self._RealGetContents()
1267 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001268 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001269 except BadZipFile:
1270 # file is not a zip file, just append
1271 self.fp.seek(0, 2)
1272
1273 # set the modified flag so central directory gets written
1274 # even if no files are added to the archive
1275 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001276 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001277 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001278 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001279 except:
1280 fp = self.fp
1281 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001282 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001283 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001284
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001285 def __enter__(self):
1286 return self
1287
1288 def __exit__(self, type, value, traceback):
1289 self.close()
1290
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001291 def __repr__(self):
1292 result = ['<%s.%s' % (self.__class__.__module__,
1293 self.__class__.__qualname__)]
1294 if self.fp is not None:
1295 if self._filePassed:
1296 result.append(' file=%r' % self.fp)
1297 elif self.filename is not None:
1298 result.append(' filename=%r' % self.filename)
1299 result.append(' mode=%r' % self.mode)
1300 else:
1301 result.append(' [closed]')
1302 result.append('>')
1303 return ''.join(result)
1304
Tim Peters7d3bad62001-04-04 18:56:49 +00001305 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001306 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001308 try:
1309 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001310 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001311 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001312 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001313 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001315 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001316 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1317 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001318 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001319
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001320 # "concat" is zero, unless zip was concatenated to another file
1321 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001322 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1323 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001324 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001325
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001327 inferred = concat + offset_cd
1328 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001329 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001330 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001331 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001332 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001333 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001334 total = 0
1335 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001336 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001337 if len(centdir) != sizeCentralDir:
1338 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001340 if centdir[_CD_SIGNATURE] != stringCentralDir:
1341 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001343 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001344 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001345 flags = centdir[5]
1346 if flags & 0x800:
1347 # UTF-8 file names extension
1348 filename = filename.decode('utf-8')
1349 else:
1350 # Historical ZIP filename encoding
1351 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001353 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001354 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1355 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001356 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001357 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001358 x.flag_bits, x.compress_type, t, d,
1359 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001360 if x.extract_version > MAX_EXTRACT_VERSION:
1361 raise NotImplementedError("zip file version %.1f" %
1362 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001363 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1364 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001365 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001367 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001368
1369 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001370 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371 self.filelist.append(x)
1372 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001373
1374 # update total bytes read from central directory
1375 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1376 + centdir[_CD_EXTRA_FIELD_LENGTH]
1377 + centdir[_CD_COMMENT_LENGTH])
1378
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001380 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001381
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001382
1383 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001384 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001385 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386
1387 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001388 """Return a list of class ZipInfo instances for files in the
1389 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001390 return self.filelist
1391
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001392 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001393 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001394 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1395 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001397 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001398 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1399 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400
1401 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001402 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001403 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001404 for zinfo in self.filelist:
1405 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001406 # Read by chunks, to avoid an OverflowError or a
1407 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001408 with self.open(zinfo.filename, "r") as f:
1409 while f.read(chunk_size): # Check CRC-32
1410 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001411 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001412 return zinfo.filename
1413
1414 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001415 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001416 info = self.NameToInfo.get(name)
1417 if info is None:
1418 raise KeyError(
1419 'There is no item named %r in the archive' % name)
1420
1421 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422
Thomas Wouterscf297e42007-02-23 15:07:44 +00001423 def setpassword(self, pwd):
1424 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001425 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001426 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001427 if pwd:
1428 self.pwd = pwd
1429 else:
1430 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001431
R David Murrayf50b38a2012-04-12 18:44:58 -04001432 @property
1433 def comment(self):
1434 """The comment text associated with the ZIP file."""
1435 return self._comment
1436
1437 @comment.setter
1438 def comment(self, comment):
1439 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001440 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001441 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001442 if len(comment) > ZIP_MAX_COMMENT:
1443 import warnings
1444 warnings.warn('Archive comment is too long; truncating to %d bytes'
1445 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001446 comment = comment[:ZIP_MAX_COMMENT]
1447 self._comment = comment
1448 self._didModify = True
1449
Thomas Wouterscf297e42007-02-23 15:07:44 +00001450 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001451 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001452 with self.open(name, "r", pwd) as fp:
1453 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001454
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001455 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001456 """Return file-like object for 'name'.
1457
1458 name is a string for the file name within the ZIP file, or a ZipInfo
1459 object.
1460
1461 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1462 write to a file newly added to the archive.
1463
1464 pwd is the password to decrypt files (only used for reading).
1465
1466 When writing, if the file size is not known in advance but may exceed
1467 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1468 files. If the size is known in advance, it is best to pass a ZipInfo
1469 instance for name, with zinfo.file_size set.
1470 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001471 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001472 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001473 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001474 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001475 if pwd and (mode == "w"):
1476 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001477 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001478 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001479 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001480
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001481 # Make sure we have an info object
1482 if isinstance(name, ZipInfo):
1483 # 'name' is already an info object
1484 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001485 elif mode == 'w':
1486 zinfo = ZipInfo(name)
1487 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001488 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001489 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001490 # Get info object for name
1491 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001492
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001493 if mode == 'w':
1494 return self._open_to_write(zinfo, force_zip64=force_zip64)
1495
1496 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001497 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001498 "is an open writing handle on it. "
1499 "Close the writing handle before trying to read.")
1500
1501 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001502 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001503 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1504 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001505 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001506 # Skip the file header:
1507 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001508 if len(fheader) != sizeFileHeader:
1509 raise BadZipFile("Truncated file header")
1510 fheader = struct.unpack(structFileHeader, fheader)
1511 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001512 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001513
Antoine Pitrou17babc52012-11-17 23:50:08 +01001514 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1515 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1516 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001517
Antoine Pitrou8572da52012-11-17 23:52:05 +01001518 if zinfo.flag_bits & 0x20:
1519 # Zip 2.7: compressed patched data
1520 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001521
Antoine Pitrou8572da52012-11-17 23:52:05 +01001522 if zinfo.flag_bits & 0x40:
1523 # strong encryption
1524 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001525
Antoine Pitrou17babc52012-11-17 23:50:08 +01001526 if zinfo.flag_bits & 0x800:
1527 # UTF-8 filename
1528 fname_str = fname.decode("utf-8")
1529 else:
1530 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001531
Antoine Pitrou17babc52012-11-17 23:50:08 +01001532 if fname_str != zinfo.orig_filename:
1533 raise BadZipFile(
1534 'File name in directory %r and header %r differ.'
1535 % (zinfo.orig_filename, fname))
1536
1537 # check for encrypted flag & handle password
1538 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001539 if is_encrypted:
1540 if not pwd:
1541 pwd = self.pwd
1542 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001543 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001544 "required for extraction" % name)
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001545 else:
1546 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001547
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001548 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001549 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001550 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001551 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001552
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001553 def _open_to_write(self, zinfo, force_zip64=False):
1554 if force_zip64 and not self._allowZip64:
1555 raise ValueError(
1556 "force_zip64 is True, but allowZip64 was False when opening "
1557 "the ZIP file."
1558 )
1559 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001560 raise ValueError("Can't write to the ZIP file while there is "
1561 "another write handle open on it. "
1562 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001563
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001564 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001565 zinfo.compress_size = 0
1566 zinfo.CRC = 0
1567
1568 zinfo.flag_bits = 0x00
1569 if zinfo.compress_type == ZIP_LZMA:
1570 # Compressed data includes an end-of-stream (EOS) marker
1571 zinfo.flag_bits |= 0x02
1572 if not self._seekable:
1573 zinfo.flag_bits |= 0x08
1574
1575 if not zinfo.external_attr:
1576 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1577
1578 # Compressed size can be larger than uncompressed size
1579 zip64 = self._allowZip64 and \
1580 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1581
1582 if self._seekable:
1583 self.fp.seek(self.start_dir)
1584 zinfo.header_offset = self.fp.tell()
1585
1586 self._writecheck(zinfo)
1587 self._didModify = True
1588
1589 self.fp.write(zinfo.FileHeader(zip64))
1590
1591 self._writing = True
1592 return _ZipWriteFile(self, zinfo, zip64)
1593
Christian Heimes790c8232008-01-07 21:14:23 +00001594 def extract(self, member, path=None, pwd=None):
1595 """Extract a member from the archive to the current working directory,
1596 using its full name. Its file information is extracted as accurately
1597 as possible. `member' may be a filename or a ZipInfo object. You can
1598 specify a different directory using `path'.
1599 """
Christian Heimes790c8232008-01-07 21:14:23 +00001600 if path is None:
1601 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001602 else:
1603 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001604
1605 return self._extract_member(member, path, pwd)
1606
1607 def extractall(self, path=None, members=None, pwd=None):
1608 """Extract all members from the archive to the current working
1609 directory. `path' specifies a different directory to extract to.
1610 `members' is optional and must be a subset of the list returned
1611 by namelist().
1612 """
1613 if members is None:
1614 members = self.namelist()
1615
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001616 if path is None:
1617 path = os.getcwd()
1618 else:
1619 path = os.fspath(path)
1620
Christian Heimes790c8232008-01-07 21:14:23 +00001621 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001622 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001623
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001624 @classmethod
1625 def _sanitize_windows_name(cls, arcname, pathsep):
1626 """Replace bad characters and remove trailing dots from parts."""
1627 table = cls._windows_illegal_name_trans_table
1628 if not table:
1629 illegal = ':<>|"?*'
1630 table = str.maketrans(illegal, '_' * len(illegal))
1631 cls._windows_illegal_name_trans_table = table
1632 arcname = arcname.translate(table)
1633 # remove trailing dots
1634 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1635 # rejoin, removing empty parts.
1636 arcname = pathsep.join(x for x in arcname if x)
1637 return arcname
1638
Christian Heimes790c8232008-01-07 21:14:23 +00001639 def _extract_member(self, member, targetpath, pwd):
1640 """Extract the ZipInfo object 'member' to a physical
1641 file on the path targetpath.
1642 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001643 if not isinstance(member, ZipInfo):
1644 member = self.getinfo(member)
1645
Christian Heimes790c8232008-01-07 21:14:23 +00001646 # build the destination pathname, replacing
1647 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001648 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001649
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001650 if os.path.altsep:
1651 arcname = arcname.replace(os.path.altsep, os.path.sep)
1652 # interpret absolute pathname as relative, remove drive letter or
1653 # UNC path, redundant separators, "." and ".." components.
1654 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001655 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001656 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001657 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001658 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001659 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001660 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001661
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001662 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001663 targetpath = os.path.normpath(targetpath)
1664
1665 # Create all upper directories if necessary.
1666 upperdirs = os.path.dirname(targetpath)
1667 if upperdirs and not os.path.exists(upperdirs):
1668 os.makedirs(upperdirs)
1669
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001670 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001671 if not os.path.isdir(targetpath):
1672 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001673 return targetpath
1674
Antoine Pitrou17babc52012-11-17 23:50:08 +01001675 with self.open(member, pwd=pwd) as source, \
1676 open(targetpath, "wb") as target:
1677 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001678
1679 return targetpath
1680
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001681 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001682 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001683 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001684 import warnings
1685 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001686 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001687 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001688 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001689 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001690 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001691 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001692 if not self._allowZip64:
1693 requires_zip64 = None
1694 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1695 requires_zip64 = "Files count"
1696 elif zinfo.file_size > ZIP64_LIMIT:
1697 requires_zip64 = "Filesize"
1698 elif zinfo.header_offset > ZIP64_LIMIT:
1699 requires_zip64 = "Zipfile size"
1700 if requires_zip64:
1701 raise LargeZipFile(requires_zip64 +
1702 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001703
Bo Baylesce237c72018-01-29 23:54:07 -06001704 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001705 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001706 """Put the bytes from filename into the archive under the name
1707 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001708 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001709 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001710 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001711 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001712 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001713 "Can't write to ZIP archive while an open writing handle exists"
1714 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001715
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001716 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001717 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001718
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001719 if zinfo.is_dir():
1720 zinfo.compress_size = 0
1721 zinfo.CRC = 0
1722 else:
1723 if compress_type is not None:
1724 zinfo.compress_type = compress_type
1725 else:
1726 zinfo.compress_type = self.compression
1727
Bo Baylesce237c72018-01-29 23:54:07 -06001728 if compresslevel is not None:
1729 zinfo._compresslevel = compresslevel
1730 else:
1731 zinfo._compresslevel = self.compresslevel
1732
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001733 if zinfo.is_dir():
1734 with self._lock:
1735 if self._seekable:
1736 self.fp.seek(self.start_dir)
1737 zinfo.header_offset = self.fp.tell() # Start of header bytes
1738 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001739 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001740 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001741
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001742 self._writecheck(zinfo)
1743 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001744
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001745 self.filelist.append(zinfo)
1746 self.NameToInfo[zinfo.filename] = zinfo
1747 self.fp.write(zinfo.FileHeader(False))
1748 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001749 else:
1750 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1751 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001752
Bo Baylesce237c72018-01-29 23:54:07 -06001753 def writestr(self, zinfo_or_arcname, data,
1754 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001755 """Write a file into the archive. The contents is 'data', which
1756 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1757 it is encoded as UTF-8 first.
1758 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001759 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001760 if isinstance(data, str):
1761 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001762 if not isinstance(zinfo_or_arcname, ZipInfo):
1763 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001764 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001765 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001766 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001767 if zinfo.filename[-1] == '/':
1768 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1769 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1770 else:
1771 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001772 else:
1773 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001774
1775 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001776 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001777 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001778 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001779 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001780 "Can't write to ZIP archive while an open writing handle exists."
1781 )
1782
1783 if compress_type is not None:
1784 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001785
Bo Baylesce237c72018-01-29 23:54:07 -06001786 if compresslevel is not None:
1787 zinfo._compresslevel = compresslevel
1788
Guido van Rossum85825dc2007-08-27 17:03:28 +00001789 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001790 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001791 with self.open(zinfo, mode='w') as dest:
1792 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001793
1794 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001795 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001796 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001797
1798 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001799 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001800 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001801 if self.fp is None:
1802 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001803
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001804 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001805 raise ValueError("Can't close the ZIP file while there is "
1806 "an open writing handle on it. "
1807 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001808
Antoine Pitrou17babc52012-11-17 23:50:08 +01001809 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001810 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001811 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001812 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001813 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001814 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001815 finally:
1816 fp = self.fp
1817 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001818 self._fpclose(fp)
1819
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001820 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001821 for zinfo in self.filelist: # write central directory
1822 dt = zinfo.date_time
1823 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1824 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1825 extra = []
1826 if zinfo.file_size > ZIP64_LIMIT \
1827 or zinfo.compress_size > ZIP64_LIMIT:
1828 extra.append(zinfo.file_size)
1829 extra.append(zinfo.compress_size)
1830 file_size = 0xffffffff
1831 compress_size = 0xffffffff
1832 else:
1833 file_size = zinfo.file_size
1834 compress_size = zinfo.compress_size
1835
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001836 if zinfo.header_offset > ZIP64_LIMIT:
1837 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001838 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001839 else:
1840 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001841
1842 extra_data = zinfo.extra
1843 min_version = 0
1844 if extra:
1845 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001846 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001847 extra_data = struct.pack(
1848 '<HH' + 'Q'*len(extra),
1849 1, 8*len(extra), *extra) + extra_data
1850
1851 min_version = ZIP64_VERSION
1852
1853 if zinfo.compress_type == ZIP_BZIP2:
1854 min_version = max(BZIP2_VERSION, min_version)
1855 elif zinfo.compress_type == ZIP_LZMA:
1856 min_version = max(LZMA_VERSION, min_version)
1857
1858 extract_version = max(min_version, zinfo.extract_version)
1859 create_version = max(min_version, zinfo.create_version)
1860 try:
1861 filename, flag_bits = zinfo._encodeFilenameFlags()
1862 centdir = struct.pack(structCentralDir,
1863 stringCentralDir, create_version,
1864 zinfo.create_system, extract_version, zinfo.reserved,
1865 flag_bits, zinfo.compress_type, dostime, dosdate,
1866 zinfo.CRC, compress_size, file_size,
1867 len(filename), len(extra_data), len(zinfo.comment),
1868 0, zinfo.internal_attr, zinfo.external_attr,
1869 header_offset)
1870 except DeprecationWarning:
1871 print((structCentralDir, stringCentralDir, create_version,
1872 zinfo.create_system, extract_version, zinfo.reserved,
1873 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1874 zinfo.CRC, compress_size, file_size,
1875 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1876 0, zinfo.internal_attr, zinfo.external_attr,
1877 header_offset), file=sys.stderr)
1878 raise
1879 self.fp.write(centdir)
1880 self.fp.write(filename)
1881 self.fp.write(extra_data)
1882 self.fp.write(zinfo.comment)
1883
1884 pos2 = self.fp.tell()
1885 # Write end-of-zip-archive record
1886 centDirCount = len(self.filelist)
1887 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001888 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001889 requires_zip64 = None
1890 if centDirCount > ZIP_FILECOUNT_LIMIT:
1891 requires_zip64 = "Files count"
1892 elif centDirOffset > ZIP64_LIMIT:
1893 requires_zip64 = "Central directory offset"
1894 elif centDirSize > ZIP64_LIMIT:
1895 requires_zip64 = "Central directory size"
1896 if requires_zip64:
1897 # Need to write the ZIP64 end-of-archive records
1898 if not self._allowZip64:
1899 raise LargeZipFile(requires_zip64 +
1900 " would require ZIP64 extensions")
1901 zip64endrec = struct.pack(
1902 structEndArchive64, stringEndArchive64,
1903 44, 45, 45, 0, 0, centDirCount, centDirCount,
1904 centDirSize, centDirOffset)
1905 self.fp.write(zip64endrec)
1906
1907 zip64locrec = struct.pack(
1908 structEndArchive64Locator,
1909 stringEndArchive64Locator, 0, pos2, 1)
1910 self.fp.write(zip64locrec)
1911 centDirCount = min(centDirCount, 0xFFFF)
1912 centDirSize = min(centDirSize, 0xFFFFFFFF)
1913 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1914
1915 endrec = struct.pack(structEndArchive, stringEndArchive,
1916 0, 0, centDirCount, centDirCount,
1917 centDirSize, centDirOffset, len(self._comment))
1918 self.fp.write(endrec)
1919 self.fp.write(self._comment)
1920 self.fp.flush()
1921
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001922 def _fpclose(self, fp):
1923 assert self._fileRefCnt > 0
1924 self._fileRefCnt -= 1
1925 if not self._fileRefCnt and not self._filePassed:
1926 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001927
1928
1929class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001930 """Class to create ZIP archives with Python library files and packages."""
1931
Georg Brandl8334fd92010-12-04 10:26:46 +00001932 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001933 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 ZipFile.__init__(self, file, mode=mode, compression=compression,
1935 allowZip64=allowZip64)
1936 self._optimize = optimize
1937
Christian Tismer59202e52013-10-21 03:59:23 +02001938 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001939 """Add all files from "pathname" to the ZIP archive.
1940
Fred Drake484d7352000-10-02 21:14:52 +00001941 If pathname is a package directory, search the directory and
1942 all package subdirectories recursively for all *.py and enter
1943 the modules into the archive. If pathname is a plain
1944 directory, listdir *.py and enter all modules. Else, pathname
1945 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001946 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001947 This method will compile the module.py into module.pyc if
1948 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001949 If filterfunc(pathname) is given, it is called with every argument.
1950 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001951 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001952 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001953 if filterfunc and not filterfunc(pathname):
1954 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001955 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001956 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001957 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001958 dir, name = os.path.split(pathname)
1959 if os.path.isdir(pathname):
1960 initname = os.path.join(pathname, "__init__.py")
1961 if os.path.isfile(initname):
1962 # This is a package directory, add it
1963 if basename:
1964 basename = "%s/%s" % (basename, name)
1965 else:
1966 basename = name
1967 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001968 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001969 fname, arcname = self._get_codename(initname[0:-3], basename)
1970 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001971 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001972 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001973 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001974 dirlist.remove("__init__.py")
1975 # Add all *.py files and package subdirectories
1976 for filename in dirlist:
1977 path = os.path.join(pathname, filename)
1978 root, ext = os.path.splitext(filename)
1979 if os.path.isdir(path):
1980 if os.path.isfile(os.path.join(path, "__init__.py")):
1981 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001982 self.writepy(path, basename,
1983 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001984 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001985 if filterfunc and not filterfunc(path):
1986 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001987 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001988 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001989 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001990 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001991 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001992 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 self.write(fname, arcname)
1994 else:
1995 # This is NOT a package directory, add its files at top level
1996 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001997 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001998 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001999 path = os.path.join(pathname, filename)
2000 root, ext = os.path.splitext(filename)
2001 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002002 if filterfunc and not filterfunc(path):
2003 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002004 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002005 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002006 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002007 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002008 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002009 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002010 self.write(fname, arcname)
2011 else:
2012 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002013 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002014 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002015 fname, arcname = self._get_codename(pathname[0:-3], basename)
2016 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002017 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002018 self.write(fname, arcname)
2019
2020 def _get_codename(self, pathname, basename):
2021 """Return (filename, archivename) for the path.
2022
Fred Drake484d7352000-10-02 21:14:52 +00002023 Given a module name path, return the correct file path and
2024 archive name, compiling if necessary. For example, given
2025 /python/lib/string, return (/python/lib/string.pyc, string).
2026 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002027 def _compile(file, optimize=-1):
2028 import py_compile
2029 if self.debug:
2030 print("Compiling", file)
2031 try:
2032 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002033 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002034 print(err.msg)
2035 return False
2036 return True
2037
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002038 file_py = pathname + ".py"
2039 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002040 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2041 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2042 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002043 if self._optimize == -1:
2044 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002045 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002046 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2047 # Use .pyc file.
2048 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002049 elif (os.path.isfile(pycache_opt0) and
2050 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002051 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2052 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002053 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002054 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002055 elif (os.path.isfile(pycache_opt1) and
2056 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2057 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002058 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002059 fname = pycache_opt1
2060 arcname = file_pyc
2061 elif (os.path.isfile(pycache_opt2) and
2062 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2063 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2064 # file name in the archive.
2065 fname = pycache_opt2
2066 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002067 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002068 # Compile py into PEP 3147 pyc file.
2069 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002070 if sys.flags.optimize == 0:
2071 fname = pycache_opt0
2072 elif sys.flags.optimize == 1:
2073 fname = pycache_opt1
2074 else:
2075 fname = pycache_opt2
2076 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002077 else:
2078 fname = arcname = file_py
2079 else:
2080 # new mode: use given optimization level
2081 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002082 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002083 arcname = file_pyc
2084 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002085 arcname = file_pyc
2086 if self._optimize == 1:
2087 fname = pycache_opt1
2088 elif self._optimize == 2:
2089 fname = pycache_opt2
2090 else:
2091 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2092 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002093 if not (os.path.isfile(fname) and
2094 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2095 if not _compile(file_py, optimize=self._optimize):
2096 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002097 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002098 if basename:
2099 archivename = "%s/%s" % (basename, archivename)
2100 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002101
2102
shireenraoa4e29912019-08-24 11:26:41 -04002103def _unique_everseen(iterable, key=None):
2104 "List unique elements, preserving order. Remember all elements ever seen."
2105 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2106 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2107 seen = set()
2108 seen_add = seen.add
2109 if key is None:
2110 for element in itertools.filterfalse(seen.__contains__, iterable):
2111 seen_add(element)
2112 yield element
2113 else:
2114 for element in iterable:
2115 k = key(element)
2116 if k not in seen:
2117 seen_add(k)
2118 yield element
2119
2120
2121def _parents(path):
2122 """
2123 Given a path with elements separated by
2124 posixpath.sep, generate all parents of that path.
2125
2126 >>> list(_parents('b/d'))
2127 ['b']
2128 >>> list(_parents('/b/d/'))
2129 ['/b']
2130 >>> list(_parents('b/d/f/'))
2131 ['b/d', 'b']
2132 >>> list(_parents('b'))
2133 []
2134 >>> list(_parents(''))
2135 []
2136 """
2137 return itertools.islice(_ancestry(path), 1, None)
2138
2139
2140def _ancestry(path):
2141 """
2142 Given a path with elements separated by
2143 posixpath.sep, generate all elements of that path
2144
2145 >>> list(_ancestry('b/d'))
2146 ['b/d', 'b']
2147 >>> list(_ancestry('/b/d/'))
2148 ['/b/d', '/b']
2149 >>> list(_ancestry('b/d/f/'))
2150 ['b/d/f', 'b/d', 'b']
2151 >>> list(_ancestry('b'))
2152 ['b']
2153 >>> list(_ancestry(''))
2154 []
2155 """
2156 path = path.rstrip(posixpath.sep)
2157 while path and path != posixpath.sep:
2158 yield path
2159 path, tail = posixpath.split(path)
2160
2161
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002162class Path:
2163 """
2164 A pathlib-compatible interface for zip files.
2165
2166 Consider a zip file with this structure::
2167
2168 .
2169 ├── a.txt
2170 └── b
2171 ├── c.txt
2172 └── d
2173 └── e.txt
2174
2175 >>> data = io.BytesIO()
2176 >>> zf = ZipFile(data, 'w')
2177 >>> zf.writestr('a.txt', 'content of a')
2178 >>> zf.writestr('b/c.txt', 'content of c')
2179 >>> zf.writestr('b/d/e.txt', 'content of e')
2180 >>> zf.filename = 'abcde.zip'
2181
2182 Path accepts the zipfile object itself or a filename
2183
2184 >>> root = Path(zf)
2185
2186 From there, several path operations are available.
2187
2188 Directory iteration (including the zip file itself):
2189
2190 >>> a, b = root.iterdir()
2191 >>> a
2192 Path('abcde.zip', 'a.txt')
2193 >>> b
2194 Path('abcde.zip', 'b/')
2195
2196 name property:
2197
2198 >>> b.name
2199 'b'
2200
2201 join with divide operator:
2202
2203 >>> c = b / 'c.txt'
2204 >>> c
2205 Path('abcde.zip', 'b/c.txt')
2206 >>> c.name
2207 'c.txt'
2208
2209 Read text:
2210
2211 >>> c.read_text()
2212 'content of c'
2213
2214 existence:
2215
2216 >>> c.exists()
2217 True
2218 >>> (b / 'missing.txt').exists()
2219 False
2220
Xtreak0d702272019-06-03 04:42:33 +05302221 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002222
2223 >>> str(c)
2224 'abcde.zip/b/c.txt'
2225 """
2226
2227 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2228
2229 def __init__(self, root, at=""):
2230 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2231 self.at = at
2232
2233 @property
2234 def open(self):
2235 return functools.partial(self.root.open, self.at)
2236
2237 @property
2238 def name(self):
2239 return posixpath.basename(self.at.rstrip("/"))
2240
2241 def read_text(self, *args, **kwargs):
2242 with self.open() as strm:
2243 return io.TextIOWrapper(strm, *args, **kwargs).read()
2244
2245 def read_bytes(self):
2246 with self.open() as strm:
2247 return strm.read()
2248
2249 def _is_child(self, path):
2250 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2251
2252 def _next(self, at):
2253 return Path(self.root, at)
2254
2255 def is_dir(self):
2256 return not self.at or self.at.endswith("/")
2257
2258 def is_file(self):
2259 return not self.is_dir()
2260
2261 def exists(self):
2262 return self.at in self._names()
2263
2264 def iterdir(self):
2265 if not self.is_dir():
2266 raise ValueError("Can't listdir a file")
2267 subs = map(self._next, self._names())
2268 return filter(self._is_child, subs)
2269
2270 def __str__(self):
2271 return posixpath.join(self.root.filename, self.at)
2272
2273 def __repr__(self):
2274 return self.__repr.format(self=self)
2275
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002276 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002277 next = posixpath.join(self.at, add)
2278 next_dir = posixpath.join(self.at, add, "")
2279 names = self._names()
2280 return self._next(next_dir if next not in names and next_dir in names else next)
2281
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002282 __truediv__ = joinpath
2283
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002284 @staticmethod
shireenraoa4e29912019-08-24 11:26:41 -04002285 def _implied_dirs(names):
2286 return _unique_everseen(
2287 parent + "/"
2288 for name in names
2289 for parent in _parents(name)
2290 if parent + "/" not in names
2291 )
2292
2293 @classmethod
2294 def _add_implied_dirs(cls, names):
2295 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002296
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002297 @property
2298 def parent(self):
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002299 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002300 if parent_at:
2301 parent_at += '/'
2302 return self._next(parent_at)
2303
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002304 def _names(self):
2305 return self._add_implied_dirs(self.root.namelist())
2306
2307
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002308def main(args=None):
2309 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002310
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002311 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002312 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002313 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002314 group.add_argument('-l', '--list', metavar='<zipfile>',
2315 help='Show listing of a zipfile')
2316 group.add_argument('-e', '--extract', nargs=2,
2317 metavar=('<zipfile>', '<output_dir>'),
2318 help='Extract zipfile into target dir')
2319 group.add_argument('-c', '--create', nargs='+',
2320 metavar=('<name>', '<file>'),
2321 help='Create zipfile from sources')
2322 group.add_argument('-t', '--test', metavar='<zipfile>',
2323 help='Test if a zipfile is valid')
2324 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002325
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002326 if args.test is not None:
2327 src = args.test
2328 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002329 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002330 if badfile:
2331 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002332 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002333
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002334 elif args.list is not None:
2335 src = args.list
2336 with ZipFile(src, 'r') as zf:
2337 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002338
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002339 elif args.extract is not None:
2340 src, curdir = args.extract
2341 with ZipFile(src, 'r') as zf:
2342 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002343
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002344 elif args.create is not None:
2345 zip_name = args.create.pop(0)
2346 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002347
2348 def addToZip(zf, path, zippath):
2349 if os.path.isfile(path):
2350 zf.write(path, zippath, ZIP_DEFLATED)
2351 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002352 if zippath:
2353 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002354 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002355 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002356 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002357 # else: ignore
2358
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002359 with ZipFile(zip_name, 'w') as zf:
2360 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002361 zippath = os.path.basename(path)
2362 if not zippath:
2363 zippath = os.path.basename(os.path.dirname(path))
2364 if zippath in ('', os.curdir, os.pardir):
2365 zippath = ''
2366 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002367
2368if __name__ == "__main__":
2369 main()