blob: 6504e0eee8b5a13c44d6da8ab26e64d2e67af762 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
shireenraoa4e29912019-08-24 11:26:41 -040010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000019
20try:
Tim Peterse1190062001-01-15 03:34:38 +000021 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040023except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000025 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027try:
28 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040029except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020030 bz2 = None
31
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032try:
33 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040034except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 lzma = None
36
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020037__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020038 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000039 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000040
Georg Brandl4d540882010-10-28 06:42:33 +000041class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000042 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
44
45class LargeZipFile(Exception):
46 """
47 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
48 and those extensions are disabled.
49 """
50
Georg Brandl4d540882010-10-28 06:42:33 +000051error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
52
Guido van Rossum32abe6f2000-03-31 17:30:02 +000053
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000054ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030055ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000057
Guido van Rossum32abe6f2000-03-31 17:30:02 +000058# constants for Zip file compression methods
59ZIP_STORED = 0
60ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020061ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020062ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000063# Other ZIP compression methods not supported
64
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020065DEFAULT_VERSION = 20
66ZIP64_VERSION = 45
67BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020069# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020071
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072# Below are some formats and associated data for reading/writing headers using
73# the struct module. The names and structures of headers/records are those used
74# in the PKWARE description of the ZIP file format:
75# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
76# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000077
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078# The "end of central directory" structure, magic number, size, and indices
79# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000080structEndArchive = b"<4s4H2LH"
81stringEndArchive = b"PK\005\006"
82sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000083
84_ECD_SIGNATURE = 0
85_ECD_DISK_NUMBER = 1
86_ECD_DISK_START = 2
87_ECD_ENTRIES_THIS_DISK = 3
88_ECD_ENTRIES_TOTAL = 4
89_ECD_SIZE = 5
90_ECD_OFFSET = 6
91_ECD_COMMENT_SIZE = 7
92# These last two indices are not part of the structure as defined in the
93# spec, but they are used internally by this module as a convenience
94_ECD_COMMENT = 8
95_ECD_LOCATION = 9
96
97# The "central directory" structure, magic number, size, and indices
98# of entries in the structure (section V.F in the format document)
99structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000100stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101sizeCentralDir = struct.calcsize(structCentralDir)
102
Fred Drake3e038e52001-02-28 17:56:26 +0000103# indexes of entries in the central directory structure
104_CD_SIGNATURE = 0
105_CD_CREATE_VERSION = 1
106_CD_CREATE_SYSTEM = 2
107_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000108_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000109_CD_FLAG_BITS = 5
110_CD_COMPRESS_TYPE = 6
111_CD_TIME = 7
112_CD_DATE = 8
113_CD_CRC = 9
114_CD_COMPRESSED_SIZE = 10
115_CD_UNCOMPRESSED_SIZE = 11
116_CD_FILENAME_LENGTH = 12
117_CD_EXTRA_FIELD_LENGTH = 13
118_CD_COMMENT_LENGTH = 14
119_CD_DISK_NUMBER_START = 15
120_CD_INTERNAL_FILE_ATTRIBUTES = 16
121_CD_EXTERNAL_FILE_ATTRIBUTES = 17
122_CD_LOCAL_HEADER_OFFSET = 18
123
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124# The "local file header" structure, magic number, size, and indices
125# (section V.A in the format document)
126structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000128sizeFileHeader = struct.calcsize(structFileHeader)
129
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_SIGNATURE = 0
131_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000133_FH_GENERAL_PURPOSE_FLAG_BITS = 3
134_FH_COMPRESSION_METHOD = 4
135_FH_LAST_MOD_TIME = 5
136_FH_LAST_MOD_DATE = 6
137_FH_CRC = 7
138_FH_COMPRESSED_SIZE = 8
139_FH_UNCOMPRESSED_SIZE = 9
140_FH_FILENAME_LENGTH = 10
141_FH_EXTRA_FIELD_LENGTH = 11
142
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000143# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000144structEndArchive64Locator = "<4sLQL"
145stringEndArchive64Locator = b"PK\x06\x07"
146sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000147
148# The "Zip64 end of central directory" record, magic number, size, and indices
149# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000150structEndArchive64 = "<4sQ2H2L4Q"
151stringEndArchive64 = b"PK\x06\x06"
152sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000153
154_CD64_SIGNATURE = 0
155_CD64_DIRECTORY_RECSIZE = 1
156_CD64_CREATE_VERSION = 2
157_CD64_EXTRACT_VERSION = 3
158_CD64_DISK_NUMBER = 4
159_CD64_DISK_NUMBER_START = 5
160_CD64_NUMBER_ENTRIES_THIS_DISK = 6
161_CD64_NUMBER_ENTRIES_TOTAL = 7
162_CD64_DIRECTORY_SIZE = 8
163_CD64_OFFSET_START_CENTDIR = 9
164
Silas Sewell4ba3b502018-09-18 13:00:05 -0400165_DD_SIGNATURE = 0x08074b50
166
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300167_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
168
169def _strip_extra(extra, xids):
170 # Remove Extra Fields with specified IDs.
171 unpack = _EXTRA_FIELD_STRUCT.unpack
172 modified = False
173 buffer = []
174 start = i = 0
175 while i + 4 <= len(extra):
176 xid, xlen = unpack(extra[i : i + 4])
177 j = i + 4 + xlen
178 if xid in xids:
179 if i != start:
180 buffer.append(extra[start : i])
181 start = j
182 modified = True
183 i = j
184 if not modified:
185 return extra
186 return b''.join(buffer)
187
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000189 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190 if _EndRecData(fp):
191 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000193 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000194 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000196def is_zipfile(filename):
197 """Quickly see if a file is a ZIP file by checking the magic number.
198
199 The filename argument may be a file or file-like object too.
200 """
201 result = False
202 try:
203 if hasattr(filename, "read"):
204 result = _check_zipfile(fp=filename)
205 else:
206 with open(filename, "rb") as fp:
207 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200208 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000209 pass
210 return result
211
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212def _EndRecData64(fpin, offset, endrec):
213 """
214 Read the ZIP64 end-of-archive records and use that to update endrec
215 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 try:
217 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200218 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000219 # If the seek fails, the file is not large enough to contain a ZIP64
220 # end-of-archive record, so just return the end record we were given.
221 return endrec
222
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000223 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200224 if len(data) != sizeEndCentDir64Locator:
225 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
227 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
Francisco Facioniab0716e2019-05-29 00:15:11 +0100230 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000231 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000232
233 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
235 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200236 if len(data) != sizeEndCentDir64:
237 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200239 dircount, dircount2, dirsize, diroffset = \
240 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000242 return endrec
243
244 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000245 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246 endrec[_ECD_DISK_NUMBER] = disk_num
247 endrec[_ECD_DISK_START] = disk_dir
248 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
249 endrec[_ECD_ENTRIES_TOTAL] = dircount2
250 endrec[_ECD_SIZE] = dirsize
251 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000252 return endrec
253
254
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255def _EndRecData(fpin):
256 """Return data from the "End of Central Directory" record, or None.
257
258 The data is a list of the nine items in the ZIP "End of central dir"
259 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Determine file size
262 fpin.seek(0, 2)
263 filesize = fpin.tell()
264
265 # Check to see if this is ZIP file with no archive comment (the
266 # "end of central directory" structure should be the last item in the
267 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 try:
269 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200270 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000271 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if (len(data) == sizeEndCentDir and
274 data[0:4] == stringEndArchive and
275 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000276 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 endrec=list(endrec)
279
280 # Append a blank comment and record start offset
281 endrec.append(b"")
282 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Either this is not a ZIP file, or it is a ZIP file with an archive
288 # comment. Search the end of the file for the "end of central directory"
289 # record signature. The comment is the last item in the ZIP file and may be
290 # up to 64K long. It is assumed that the "end of central directory" magic
291 # number does not appear in the comment.
292 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
293 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000294 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000295 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000296 if start >= 0:
297 # found the magic number; attempt to unpack and interpret
298 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200299 if len(recData) != sizeEndCentDir:
300 # Zip file is corrupted.
301 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000302 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
304 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
305 endrec.append(comment)
306 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000307
R David Murray4fbb9db2011-06-09 15:50:51 -0400308 # Try to read the "Zip64 end of central directory" structure
309 return _EndRecData64(fpin, maxCommentStart + start - filesize,
310 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000311
312 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200313 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000314
Fred Drake484d7352000-10-02 21:14:52 +0000315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000317 """Class with attributes describing each file in the ZIP archive."""
318
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'orig_filename',
321 'filename',
322 'date_time',
323 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600324 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200325 'comment',
326 'extra',
327 'create_system',
328 'create_version',
329 'extract_version',
330 'reserved',
331 'flag_bits',
332 'volume',
333 'internal_attr',
334 'external_attr',
335 'header_offset',
336 'CRC',
337 'compress_size',
338 'file_size',
339 '_raw_time',
340 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000343 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344
345 # Terminate the file name at the first null byte. Null bytes in file
346 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000347 null_byte = filename.find(chr(0))
348 if null_byte >= 0:
349 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 # This is used to ensure paths in generated ZIP files always use
351 # forward slashes as the directory separator, as required by the
352 # ZIP format specification.
353 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000354 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000355
Greg Ward8e36d282003-06-18 00:53:06 +0000356 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000357 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800358
359 if date_time[0] < 1980:
360 raise ValueError('ZIP does not support timestamps before 1980')
361
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000363 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600364 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000365 self.comment = b"" # Comment for each file
366 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000367 if sys.platform == 'win32':
368 self.create_system = 0 # System which created ZIP archive
369 else:
370 # Assume everything else is unix-y
371 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200372 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
373 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000374 self.reserved = 0 # Must be zero
375 self.flag_bits = 0 # ZIP flag bits
376 self.volume = 0 # Volume number of file header
377 self.internal_attr = 0 # Internal attributes
378 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200379 self.compress_size = 0 # Size of the compressed file
380 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000382 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000383 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200385 def __repr__(self):
386 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
387 if self.compress_type != ZIP_STORED:
388 result.append(' compress_type=%s' %
389 compressor_names.get(self.compress_type,
390 self.compress_type))
391 hi = self.external_attr >> 16
392 lo = self.external_attr & 0xFFFF
393 if hi:
394 result.append(' filemode=%r' % stat.filemode(hi))
395 if lo:
396 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200397 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200398 if not isdir or self.file_size:
399 result.append(' file_size=%r' % self.file_size)
400 if ((not isdir or self.compress_size) and
401 (self.compress_type != ZIP_STORED or
402 self.file_size != self.compress_size)):
403 result.append(' compress_size=%r' % self.compress_size)
404 result.append('>')
405 return ''.join(result)
406
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200407 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200408 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 dt = self.date_time
410 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000411 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000413 # Set these to zero because we write them after the file data
414 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000415 else:
Tim Peterse1190062001-01-15 03:34:38 +0000416 CRC = self.CRC
417 compress_size = self.compress_size
418 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419
420 extra = self.extra
421
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200422 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200423 if zip64 is None:
424 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
425 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000426 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200428 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200429 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
430 if not zip64:
431 raise LargeZipFile("Filesize would require ZIP64 extensions")
432 # File is larger than what fits into a 4 byte integer,
433 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000434 file_size = 0xffffffff
435 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200436 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000437
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 if self.compress_type == ZIP_BZIP2:
439 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200440 elif self.compress_type == ZIP_LZMA:
441 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200442
443 self.extract_version = max(min_version, self.extract_version)
444 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000445 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000446 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200447 self.extract_version, self.reserved, flag_bits,
448 self.compress_type, dostime, dosdate, CRC,
449 compress_size, file_size,
450 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000451 return header + filename + extra
452
453 def _encodeFilenameFlags(self):
454 try:
455 return self.filename.encode('ascii'), self.flag_bits
456 except UnicodeEncodeError:
457 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458
459 def _decodeExtra(self):
460 # Try to decode the extra field.
461 extra = self.extra
462 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700463 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200465 if ln+4 > len(extra):
466 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
467 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000468 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000469 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000471 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000473 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 elif ln == 0:
475 counts = ()
476 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300477 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478
479 idx = 0
480
481 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000482 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Daniel Hillierda6ce582019-10-29 18:24:18 +1100483 if len(counts) <= idx:
484 raise BadZipFile(
485 "Corrupt zip64 extra field. File size not found."
486 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000487 self.file_size = counts[idx]
488 idx += 1
489
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000490 if self.compress_size == 0xFFFFFFFF:
Daniel Hillierda6ce582019-10-29 18:24:18 +1100491 if len(counts) <= idx:
492 raise BadZipFile(
493 "Corrupt zip64 extra field. Compress size not found."
494 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000495 self.compress_size = counts[idx]
496 idx += 1
497
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000498 if self.header_offset == 0xffffffff:
Daniel Hillierda6ce582019-10-29 18:24:18 +1100499 if len(counts) <= idx:
500 raise BadZipFile(
501 "Corrupt zip64 extra field. Header offset not found."
502 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000503 old = self.header_offset
504 self.header_offset = counts[idx]
505 idx+=1
506
507 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000508
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200510 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200511 """Construct an appropriate ZipInfo for a file on the filesystem.
512
513 filename should be the path to a file or directory on the filesystem.
514
515 arcname is the name which it will have within the archive (by default,
516 this will be the same as filename, but without a drive letter and with
517 leading path separators removed).
518 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200519 if isinstance(filename, os.PathLike):
520 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200521 st = os.stat(filename)
522 isdir = stat.S_ISDIR(st.st_mode)
523 mtime = time.localtime(st.st_mtime)
524 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200525 if not strict_timestamps and date_time[0] < 1980:
526 date_time = (1980, 1, 1, 0, 0, 0)
527 elif not strict_timestamps and date_time[0] > 2107:
528 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200529 # Create ZipInfo instance to store file information
530 if arcname is None:
531 arcname = filename
532 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
533 while arcname[0] in (os.sep, os.altsep):
534 arcname = arcname[1:]
535 if isdir:
536 arcname += '/'
537 zinfo = cls(arcname, date_time)
538 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
539 if isdir:
540 zinfo.file_size = 0
541 zinfo.external_attr |= 0x10 # MS-DOS directory flag
542 else:
543 zinfo.file_size = st.st_size
544
545 return zinfo
546
547 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300548 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200549 return self.filename[-1] == '/'
550
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
553# internal keys. We noticed that a direct implementation is faster than
554# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000555
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300556_crctable = None
557def _gen_crc(crc):
558 for j in range(8):
559 if crc & 1:
560 crc = (crc >> 1) ^ 0xEDB88320
561 else:
562 crc >>= 1
563 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000564
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300565# ZIP supports a password-based form of encryption. Even though known
566# plaintext attacks have been found against it, it is still useful
567# to be able to get data out of such a file.
568#
569# Usage:
570# zd = _ZipDecrypter(mypwd)
571# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573def _ZipDecrypter(pwd):
574 key0 = 305419896
575 key1 = 591751049
576 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000577
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300578 global _crctable
579 if _crctable is None:
580 _crctable = list(map(_gen_crc, range(256)))
581 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000582
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300583 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000584 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300585 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000586
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300587 def update_keys(c):
588 nonlocal key0, key1, key2
589 key0 = crc32(c, key0)
590 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
591 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
592 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000593
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300594 for p in pwd:
595 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000596
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300597 def decrypter(data):
598 """Decrypt a bytes object."""
599 result = bytearray()
600 append = result.append
601 for c in data:
602 k = key2 | 2
603 c ^= ((k * (k^1)) >> 8) & 0xFF
604 update_keys(c)
605 append(c)
606 return bytes(result)
607
608 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000609
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200610
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611class LZMACompressor:
612
613 def __init__(self):
614 self._comp = None
615
616 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200617 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200618 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200619 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200620 ])
621 return struct.pack('<BBH', 9, 4, len(props)) + props
622
623 def compress(self, data):
624 if self._comp is None:
625 return self._init() + self._comp.compress(data)
626 return self._comp.compress(data)
627
628 def flush(self):
629 if self._comp is None:
630 return self._init() + self._comp.flush()
631 return self._comp.flush()
632
633
634class LZMADecompressor:
635
636 def __init__(self):
637 self._decomp = None
638 self._unconsumed = b''
639 self.eof = False
640
641 def decompress(self, data):
642 if self._decomp is None:
643 self._unconsumed += data
644 if len(self._unconsumed) <= 4:
645 return b''
646 psize, = struct.unpack('<H', self._unconsumed[2:4])
647 if len(self._unconsumed) <= 4 + psize:
648 return b''
649
650 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200651 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
652 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200653 ])
654 data = self._unconsumed[4 + psize:]
655 del self._unconsumed
656
657 result = self._decomp.decompress(data)
658 self.eof = self._decomp.eof
659 return result
660
661
662compressor_names = {
663 0: 'store',
664 1: 'shrink',
665 2: 'reduce',
666 3: 'reduce',
667 4: 'reduce',
668 5: 'reduce',
669 6: 'implode',
670 7: 'tokenize',
671 8: 'deflate',
672 9: 'deflate64',
673 10: 'implode',
674 12: 'bzip2',
675 14: 'lzma',
676 18: 'terse',
677 19: 'lz77',
678 97: 'wavpack',
679 98: 'ppmd',
680}
681
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200682def _check_compression(compression):
683 if compression == ZIP_STORED:
684 pass
685 elif compression == ZIP_DEFLATED:
686 if not zlib:
687 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200688 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200689 elif compression == ZIP_BZIP2:
690 if not bz2:
691 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200692 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200693 elif compression == ZIP_LZMA:
694 if not lzma:
695 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200696 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200697 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300698 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200699
700
Bo Baylesce237c72018-01-29 23:54:07 -0600701def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200702 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600703 if compresslevel is not None:
704 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
705 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200706 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600707 if compresslevel is not None:
708 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200709 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600710 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200711 elif compress_type == ZIP_LZMA:
712 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713 else:
714 return None
715
716
717def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300718 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200719 if compress_type == ZIP_STORED:
720 return None
721 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200722 return zlib.decompressobj(-15)
723 elif compress_type == ZIP_BZIP2:
724 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200725 elif compress_type == ZIP_LZMA:
726 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200727 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200728 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200729 if descr:
730 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
731 else:
732 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200733
734
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200735class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300736 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200737 self._file = file
738 self._pos = pos
739 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200740 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300741 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700742 self.seekable = file.seekable
743 self.tell = file.tell
744
745 def seek(self, offset, whence=0):
746 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200747 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700748 raise ValueError("Can't reposition in the ZIP file while "
749 "there is an open writing handle on it. "
750 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200751 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700752 self._pos = self._file.tell()
753 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200754
755 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200756 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300757 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300758 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300759 "is an open writing handle on it. "
760 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200761 self._file.seek(self._pos)
762 data = self._file.read(n)
763 self._pos = self._file.tell()
764 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200765
766 def close(self):
767 if self._file is not None:
768 fileobj = self._file
769 self._file = None
770 self._close(fileobj)
771
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200772# Provide the tell method for unseekable stream
773class _Tellable:
774 def __init__(self, fp):
775 self.fp = fp
776 self.offset = 0
777
778 def write(self, data):
779 n = self.fp.write(data)
780 self.offset += n
781 return n
782
783 def tell(self):
784 return self.offset
785
786 def flush(self):
787 self.fp.flush()
788
789 def close(self):
790 self.fp.close()
791
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200792
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 """File-like object for reading an archive member.
795 Is returned by ZipFile.open().
796 """
797
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798 # Max size supported by decompressor.
799 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000801 # Read from compressed files in 4k blocks.
802 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000803
John Jolly066df4f2018-01-30 01:51:35 -0700804 # Chunk size to read during seek
805 MAX_SEEK_READ = 1 << 24
806
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200807 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000808 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000809 self._fileobj = fileobj
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200810 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000811 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000812
Ezio Melotti92b47432010-01-28 01:44:41 +0000813 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000814 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200815 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000816
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200817 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000818
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200819 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000820 self._readbuffer = b''
821 self._offset = 0
822
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000823 self.newlines = None
824
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000825 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 self.name = zipinfo.filename
827
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000828 if hasattr(zipinfo, 'CRC'):
829 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000830 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000831 else:
832 self._expected_crc = None
833
John Jolly066df4f2018-01-30 01:51:35 -0700834 self._seekable = False
835 try:
836 if fileobj.seekable():
837 self._orig_compress_start = fileobj.tell()
838 self._orig_compress_size = zipinfo.compress_size
839 self._orig_file_size = zipinfo.file_size
840 self._orig_start_crc = self._running_crc
841 self._seekable = True
842 except AttributeError:
843 pass
844
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200845 self._decrypter = None
846 if pwd:
847 if zipinfo.flag_bits & 0x8:
848 # compare against the file type from extended local headers
849 check_byte = (zipinfo._raw_time >> 8) & 0xff
850 else:
851 # compare against the CRC otherwise
852 check_byte = (zipinfo.CRC >> 24) & 0xff
853 h = self._init_decrypter()
854 if h != check_byte:
855 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
856
857
858 def _init_decrypter(self):
859 self._decrypter = _ZipDecrypter(self._pwd)
860 # The first 12 bytes in the cypher stream is an encryption header
861 # used to strengthen the algorithm. The first 11 bytes are
862 # completely random, while the 12th contains the MSB of the CRC,
863 # or the MSB of the file time depending on the header type
864 # and is used to check the correctness of the password.
865 header = self._fileobj.read(12)
866 self._compress_left -= 12
867 return self._decrypter(header)[11]
868
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200869 def __repr__(self):
870 result = ['<%s.%s' % (self.__class__.__module__,
871 self.__class__.__qualname__)]
872 if not self.closed:
873 result.append(' name=%r mode=%r' % (self.name, self.mode))
874 if self._compress_type != ZIP_STORED:
875 result.append(' compress_type=%s' %
876 compressor_names.get(self._compress_type,
877 self._compress_type))
878 else:
879 result.append(' [closed]')
880 result.append('>')
881 return ''.join(result)
882
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000883 def readline(self, limit=-1):
884 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000886 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888
Serhiy Storchakae670be22016-06-11 19:32:44 +0300889 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000890 # Shortcut common case - newline found in buffer.
891 i = self._readbuffer.find(b'\n', self._offset) + 1
892 if i > 0:
893 line = self._readbuffer[self._offset: i]
894 self._offset = i
895 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896
Serhiy Storchakae670be22016-06-11 19:32:44 +0300897 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000898
899 def peek(self, n=1):
900 """Returns buffered bytes without advancing the position."""
901 if n > len(self._readbuffer) - self._offset:
902 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 if len(chunk) > self._offset:
904 self._readbuffer = chunk + self._readbuffer[self._offset:]
905 self._offset = 0
906 else:
907 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000908
909 # Return up to 512 bytes to reduce allocation overhead for tight loops.
910 return self._readbuffer[self._offset: self._offset + 512]
911
912 def readable(self):
913 return True
914
915 def read(self, n=-1):
916 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800917 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200919 if n is None or n < 0:
920 buf = self._readbuffer[self._offset:]
921 self._readbuffer = b''
922 self._offset = 0
923 while not self._eof:
924 buf += self._read1(self.MAX_N)
925 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926
Antoine Pitrou78157b32012-06-23 16:44:48 +0200927 end = n + self._offset
928 if end < len(self._readbuffer):
929 buf = self._readbuffer[self._offset:end]
930 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200931 return buf
932
Antoine Pitrou78157b32012-06-23 16:44:48 +0200933 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 buf = self._readbuffer[self._offset:]
935 self._readbuffer = b''
936 self._offset = 0
937 while n > 0 and not self._eof:
938 data = self._read1(n)
939 if n < len(data):
940 self._readbuffer = data
941 self._offset = n
942 buf += data[:n]
943 break
944 buf += data
945 n -= len(data)
946 return buf
947
948 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000949 # Update the CRC using the given data.
950 if self._expected_crc is None:
951 # No need to compute the CRC if we don't have a reference value
952 return
Martin Panterb82032f2015-12-11 05:19:29 +0000953 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000954 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200955 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000956 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000957
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000958 def read1(self, n):
959 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000960
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200961 if n is None or n < 0:
962 buf = self._readbuffer[self._offset:]
963 self._readbuffer = b''
964 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300965 while not self._eof:
966 data = self._read1(self.MAX_N)
967 if data:
968 buf += data
969 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200970 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000971
Antoine Pitrou78157b32012-06-23 16:44:48 +0200972 end = n + self._offset
973 if end < len(self._readbuffer):
974 buf = self._readbuffer[self._offset:end]
975 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200976 return buf
977
Antoine Pitrou78157b32012-06-23 16:44:48 +0200978 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200979 buf = self._readbuffer[self._offset:]
980 self._readbuffer = b''
981 self._offset = 0
982 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300983 while not self._eof:
984 data = self._read1(n)
985 if n < len(data):
986 self._readbuffer = data
987 self._offset = n
988 buf += data[:n]
989 break
990 if data:
991 buf += data
992 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200993 return buf
994
995 def _read1(self, n):
996 # Read up to n compressed bytes with at most one read() system call,
997 # decrypt and decompress them.
998 if self._eof or n <= 0:
999 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +00001000
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001001 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001002 if self._compress_type == ZIP_DEFLATED:
1003 ## Handle unconsumed data.
1004 data = self._decompressor.unconsumed_tail
1005 if n > len(data):
1006 data += self._read2(n - len(data))
1007 else:
1008 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001009
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001010 if self._compress_type == ZIP_STORED:
1011 self._eof = self._compress_left <= 0
1012 elif self._compress_type == ZIP_DEFLATED:
1013 n = max(n, self.MIN_READ_SIZE)
1014 data = self._decompressor.decompress(data, n)
1015 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +02001016 self._compress_left <= 0 and
1017 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001018 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001019 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001020 else:
1021 data = self._decompressor.decompress(data)
1022 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001024 data = data[:self._left]
1025 self._left -= len(data)
1026 if self._left <= 0:
1027 self._eof = True
1028 self._update_crc(data)
1029 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001030
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001031 def _read2(self, n):
1032 if self._compress_left <= 0:
1033 return b''
1034
1035 n = max(n, self.MIN_READ_SIZE)
1036 n = min(n, self._compress_left)
1037
1038 data = self._fileobj.read(n)
1039 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001040 if not data:
1041 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001042
1043 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001044 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001045 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001046
Łukasz Langae94980a2010-11-22 23:31:26 +00001047 def close(self):
1048 try:
1049 if self._close_fileobj:
1050 self._fileobj.close()
1051 finally:
1052 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001053
John Jolly066df4f2018-01-30 01:51:35 -07001054 def seekable(self):
1055 return self._seekable
1056
1057 def seek(self, offset, whence=0):
1058 if not self._seekable:
1059 raise io.UnsupportedOperation("underlying stream is not seekable")
1060 curr_pos = self.tell()
1061 if whence == 0: # Seek from start of file
1062 new_pos = offset
1063 elif whence == 1: # Seek from current position
1064 new_pos = curr_pos + offset
1065 elif whence == 2: # Seek from EOF
1066 new_pos = self._orig_file_size + offset
1067 else:
1068 raise ValueError("whence must be os.SEEK_SET (0), "
1069 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1070
1071 if new_pos > self._orig_file_size:
1072 new_pos = self._orig_file_size
1073
1074 if new_pos < 0:
1075 new_pos = 0
1076
1077 read_offset = new_pos - curr_pos
1078 buff_offset = read_offset + self._offset
1079
1080 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1081 # Just move the _offset index if the new position is in the _readbuffer
1082 self._offset = buff_offset
1083 read_offset = 0
1084 elif read_offset < 0:
1085 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001086 self._fileobj.seek(self._orig_compress_start)
1087 self._running_crc = self._orig_start_crc
1088 self._compress_left = self._orig_compress_size
1089 self._left = self._orig_file_size
1090 self._readbuffer = b''
1091 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001092 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001093 self._eof = False
1094 read_offset = new_pos
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001095 if self._decrypter is not None:
1096 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001097
1098 while read_offset > 0:
1099 read_len = min(self.MAX_SEEK_READ, read_offset)
1100 self.read(read_len)
1101 read_offset -= read_len
1102
1103 return self.tell()
1104
1105 def tell(self):
1106 if not self._seekable:
1107 raise io.UnsupportedOperation("underlying stream is not seekable")
1108 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1109 return filepos
1110
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001111
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001112class _ZipWriteFile(io.BufferedIOBase):
1113 def __init__(self, zf, zinfo, zip64):
1114 self._zinfo = zinfo
1115 self._zip64 = zip64
1116 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001117 self._compressor = _get_compressor(zinfo.compress_type,
1118 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001119 self._file_size = 0
1120 self._compress_size = 0
1121 self._crc = 0
1122
1123 @property
1124 def _fileobj(self):
1125 return self._zipfile.fp
1126
1127 def writable(self):
1128 return True
1129
1130 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001131 if self.closed:
1132 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001133 nbytes = len(data)
1134 self._file_size += nbytes
1135 self._crc = crc32(data, self._crc)
1136 if self._compressor:
1137 data = self._compressor.compress(data)
1138 self._compress_size += len(data)
1139 self._fileobj.write(data)
1140 return nbytes
1141
1142 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001143 if self.closed:
1144 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001145 try:
1146 super().close()
1147 # Flush any data from the compressor, and update header info
1148 if self._compressor:
1149 buf = self._compressor.flush()
1150 self._compress_size += len(buf)
1151 self._fileobj.write(buf)
1152 self._zinfo.compress_size = self._compress_size
1153 else:
1154 self._zinfo.compress_size = self._file_size
1155 self._zinfo.CRC = self._crc
1156 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001157
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001158 # Write updated header info
1159 if self._zinfo.flag_bits & 0x08:
1160 # Write CRC and file sizes after the file data
1161 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1162 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1163 self._zinfo.compress_size, self._zinfo.file_size))
1164 self._zipfile.start_dir = self._fileobj.tell()
1165 else:
1166 if not self._zip64:
1167 if self._file_size > ZIP64_LIMIT:
1168 raise RuntimeError(
1169 'File size unexpectedly exceeded ZIP64 limit')
1170 if self._compress_size > ZIP64_LIMIT:
1171 raise RuntimeError(
1172 'Compressed size unexpectedly exceeded ZIP64 limit')
1173 # Seek backwards and write file header (which will now include
1174 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001175
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001176 # Preserve current position in file
1177 self._zipfile.start_dir = self._fileobj.tell()
1178 self._fileobj.seek(self._zinfo.header_offset)
1179 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1180 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001181
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001182 # Successfully written: Add file to our caches
1183 self._zipfile.filelist.append(self._zinfo)
1184 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1185 finally:
1186 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001187
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001188
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001189
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001190class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001191 """ Class with methods to open, read, write, close, list zip files.
1192
Bo Baylesce237c72018-01-29 23:54:07 -06001193 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1194 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001195
Fred Drake3d9091e2001-03-26 15:49:24 +00001196 file: Either the path to the file, or a file-like object.
1197 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001198 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1199 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001200 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1201 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001202 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1203 needed, otherwise it will raise an exception when this would
1204 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001205 compresslevel: None (default for the given compression type) or an integer
1206 specifying the level to pass to the compressor.
1207 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1208 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1209 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001210
Fred Drake3d9091e2001-03-26 15:49:24 +00001211 """
Fred Drake484d7352000-10-02 21:14:52 +00001212
Fred Drake90eac282001-02-28 05:29:34 +00001213 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001214 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001215
Bo Baylesce237c72018-01-29 23:54:07 -06001216 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001217 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001218 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1219 or append 'a'."""
1220 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001221 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001222
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001223 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001224
1225 self._allowZip64 = allowZip64
1226 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001227 self.debug = 0 # Level of printing: 0 through 3
1228 self.NameToInfo = {} # Find file info given name
1229 self.filelist = [] # List of ZipInfo instances for archive
1230 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001231 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001232 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001233 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001234 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001235 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001236
Fred Drake3d9091e2001-03-26 15:49:24 +00001237 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001238 if isinstance(file, os.PathLike):
1239 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001240 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001241 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001242 self._filePassed = 0
1243 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001244 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1245 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001246 filemode = modeDict[mode]
1247 while True:
1248 try:
1249 self.fp = io.open(file, filemode)
1250 except OSError:
1251 if filemode in modeDict:
1252 filemode = modeDict[filemode]
1253 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001254 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001255 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001256 else:
1257 self._filePassed = 1
1258 self.fp = file
1259 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001260 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001261 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001262 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001263 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001264
Antoine Pitrou17babc52012-11-17 23:50:08 +01001265 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001266 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001267 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001268 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001269 # set the modified flag so central directory gets written
1270 # even if no files are added to the archive
1271 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001272 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001273 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001274 except (AttributeError, OSError):
1275 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001276 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001277 self._seekable = False
1278 else:
1279 # Some file-like objects can provide tell() but not seek()
1280 try:
1281 self.fp.seek(self.start_dir)
1282 except (AttributeError, OSError):
1283 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001284 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001285 try:
1286 # See if file is a zip file
1287 self._RealGetContents()
1288 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001289 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001290 except BadZipFile:
1291 # file is not a zip file, just append
1292 self.fp.seek(0, 2)
1293
1294 # set the modified flag so central directory gets written
1295 # even if no files are added to the archive
1296 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001297 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001298 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001299 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001300 except:
1301 fp = self.fp
1302 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001303 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001304 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001306 def __enter__(self):
1307 return self
1308
1309 def __exit__(self, type, value, traceback):
1310 self.close()
1311
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001312 def __repr__(self):
1313 result = ['<%s.%s' % (self.__class__.__module__,
1314 self.__class__.__qualname__)]
1315 if self.fp is not None:
1316 if self._filePassed:
1317 result.append(' file=%r' % self.fp)
1318 elif self.filename is not None:
1319 result.append(' filename=%r' % self.filename)
1320 result.append(' mode=%r' % self.mode)
1321 else:
1322 result.append(' [closed]')
1323 result.append('>')
1324 return ''.join(result)
1325
Tim Peters7d3bad62001-04-04 18:56:49 +00001326 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001327 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001329 try:
1330 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001331 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001332 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001333 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001334 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001335 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001336 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001337 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1338 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001339 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001340
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001341 # "concat" is zero, unless zip was concatenated to another file
1342 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001343 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1344 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001345 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001346
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001347 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001348 inferred = concat + offset_cd
1349 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001351 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001353 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001354 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001355 total = 0
1356 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001357 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001358 if len(centdir) != sizeCentralDir:
1359 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001360 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001361 if centdir[_CD_SIGNATURE] != stringCentralDir:
1362 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001363 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001364 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001365 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001366 flags = centdir[5]
1367 if flags & 0x800:
1368 # UTF-8 file names extension
1369 filename = filename.decode('utf-8')
1370 else:
1371 # Historical ZIP filename encoding
1372 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001374 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001375 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1376 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001377 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001378 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001379 x.flag_bits, x.compress_type, t, d,
1380 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001381 if x.extract_version > MAX_EXTRACT_VERSION:
1382 raise NotImplementedError("zip file version %.1f" %
1383 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001384 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1385 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001386 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001387 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001388 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389
1390 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001391 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392 self.filelist.append(x)
1393 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001394
1395 # update total bytes read from central directory
1396 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1397 + centdir[_CD_EXTRA_FIELD_LENGTH]
1398 + centdir[_CD_COMMENT_LENGTH])
1399
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001401 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001402
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001403
1404 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001405 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001406 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001407
1408 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001409 """Return a list of class ZipInfo instances for files in the
1410 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001411 return self.filelist
1412
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001413 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001414 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001415 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1416 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001417 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001418 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001419 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1420 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001421
1422 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001423 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001424 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001425 for zinfo in self.filelist:
1426 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001427 # Read by chunks, to avoid an OverflowError or a
1428 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001429 with self.open(zinfo.filename, "r") as f:
1430 while f.read(chunk_size): # Check CRC-32
1431 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001432 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001433 return zinfo.filename
1434
1435 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001436 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001437 info = self.NameToInfo.get(name)
1438 if info is None:
1439 raise KeyError(
1440 'There is no item named %r in the archive' % name)
1441
1442 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001443
Thomas Wouterscf297e42007-02-23 15:07:44 +00001444 def setpassword(self, pwd):
1445 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001446 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001447 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001448 if pwd:
1449 self.pwd = pwd
1450 else:
1451 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001452
R David Murrayf50b38a2012-04-12 18:44:58 -04001453 @property
1454 def comment(self):
1455 """The comment text associated with the ZIP file."""
1456 return self._comment
1457
1458 @comment.setter
1459 def comment(self, comment):
1460 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001461 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001462 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001463 if len(comment) > ZIP_MAX_COMMENT:
1464 import warnings
1465 warnings.warn('Archive comment is too long; truncating to %d bytes'
1466 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001467 comment = comment[:ZIP_MAX_COMMENT]
1468 self._comment = comment
1469 self._didModify = True
1470
Thomas Wouterscf297e42007-02-23 15:07:44 +00001471 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001472 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001473 with self.open(name, "r", pwd) as fp:
1474 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001475
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001476 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001477 """Return file-like object for 'name'.
1478
1479 name is a string for the file name within the ZIP file, or a ZipInfo
1480 object.
1481
1482 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1483 write to a file newly added to the archive.
1484
1485 pwd is the password to decrypt files (only used for reading).
1486
1487 When writing, if the file size is not known in advance but may exceed
1488 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1489 files. If the size is known in advance, it is best to pass a ZipInfo
1490 instance for name, with zinfo.file_size set.
1491 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001492 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001493 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001494 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001495 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001496 if pwd and (mode == "w"):
1497 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001498 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001499 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001500 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001501
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001502 # Make sure we have an info object
1503 if isinstance(name, ZipInfo):
1504 # 'name' is already an info object
1505 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001506 elif mode == 'w':
1507 zinfo = ZipInfo(name)
1508 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001509 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001510 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001511 # Get info object for name
1512 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001513
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001514 if mode == 'w':
1515 return self._open_to_write(zinfo, force_zip64=force_zip64)
1516
1517 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001518 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001519 "is an open writing handle on it. "
1520 "Close the writing handle before trying to read.")
1521
1522 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001523 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001524 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1525 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001526 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001527 # Skip the file header:
1528 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001529 if len(fheader) != sizeFileHeader:
1530 raise BadZipFile("Truncated file header")
1531 fheader = struct.unpack(structFileHeader, fheader)
1532 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001533 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001534
Antoine Pitrou17babc52012-11-17 23:50:08 +01001535 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1536 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1537 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001538
Antoine Pitrou8572da52012-11-17 23:52:05 +01001539 if zinfo.flag_bits & 0x20:
1540 # Zip 2.7: compressed patched data
1541 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001542
Antoine Pitrou8572da52012-11-17 23:52:05 +01001543 if zinfo.flag_bits & 0x40:
1544 # strong encryption
1545 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001546
Antoine Pitrou17babc52012-11-17 23:50:08 +01001547 if zinfo.flag_bits & 0x800:
1548 # UTF-8 filename
1549 fname_str = fname.decode("utf-8")
1550 else:
1551 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001552
Antoine Pitrou17babc52012-11-17 23:50:08 +01001553 if fname_str != zinfo.orig_filename:
1554 raise BadZipFile(
1555 'File name in directory %r and header %r differ.'
1556 % (zinfo.orig_filename, fname))
1557
1558 # check for encrypted flag & handle password
1559 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001560 if is_encrypted:
1561 if not pwd:
1562 pwd = self.pwd
1563 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001564 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001565 "required for extraction" % name)
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001566 else:
1567 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001568
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001569 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001570 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001571 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001572 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001573
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001574 def _open_to_write(self, zinfo, force_zip64=False):
1575 if force_zip64 and not self._allowZip64:
1576 raise ValueError(
1577 "force_zip64 is True, but allowZip64 was False when opening "
1578 "the ZIP file."
1579 )
1580 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001581 raise ValueError("Can't write to the ZIP file while there is "
1582 "another write handle open on it. "
1583 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001584
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001585 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001586 zinfo.compress_size = 0
1587 zinfo.CRC = 0
1588
1589 zinfo.flag_bits = 0x00
1590 if zinfo.compress_type == ZIP_LZMA:
1591 # Compressed data includes an end-of-stream (EOS) marker
1592 zinfo.flag_bits |= 0x02
1593 if not self._seekable:
1594 zinfo.flag_bits |= 0x08
1595
1596 if not zinfo.external_attr:
1597 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1598
1599 # Compressed size can be larger than uncompressed size
1600 zip64 = self._allowZip64 and \
1601 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1602
1603 if self._seekable:
1604 self.fp.seek(self.start_dir)
1605 zinfo.header_offset = self.fp.tell()
1606
1607 self._writecheck(zinfo)
1608 self._didModify = True
1609
1610 self.fp.write(zinfo.FileHeader(zip64))
1611
1612 self._writing = True
1613 return _ZipWriteFile(self, zinfo, zip64)
1614
Christian Heimes790c8232008-01-07 21:14:23 +00001615 def extract(self, member, path=None, pwd=None):
1616 """Extract a member from the archive to the current working directory,
1617 using its full name. Its file information is extracted as accurately
1618 as possible. `member' may be a filename or a ZipInfo object. You can
1619 specify a different directory using `path'.
1620 """
Christian Heimes790c8232008-01-07 21:14:23 +00001621 if path is None:
1622 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001623 else:
1624 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001625
1626 return self._extract_member(member, path, pwd)
1627
1628 def extractall(self, path=None, members=None, pwd=None):
1629 """Extract all members from the archive to the current working
1630 directory. `path' specifies a different directory to extract to.
1631 `members' is optional and must be a subset of the list returned
1632 by namelist().
1633 """
1634 if members is None:
1635 members = self.namelist()
1636
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001637 if path is None:
1638 path = os.getcwd()
1639 else:
1640 path = os.fspath(path)
1641
Christian Heimes790c8232008-01-07 21:14:23 +00001642 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001643 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001644
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001645 @classmethod
1646 def _sanitize_windows_name(cls, arcname, pathsep):
1647 """Replace bad characters and remove trailing dots from parts."""
1648 table = cls._windows_illegal_name_trans_table
1649 if not table:
1650 illegal = ':<>|"?*'
1651 table = str.maketrans(illegal, '_' * len(illegal))
1652 cls._windows_illegal_name_trans_table = table
1653 arcname = arcname.translate(table)
1654 # remove trailing dots
1655 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1656 # rejoin, removing empty parts.
1657 arcname = pathsep.join(x for x in arcname if x)
1658 return arcname
1659
Christian Heimes790c8232008-01-07 21:14:23 +00001660 def _extract_member(self, member, targetpath, pwd):
1661 """Extract the ZipInfo object 'member' to a physical
1662 file on the path targetpath.
1663 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001664 if not isinstance(member, ZipInfo):
1665 member = self.getinfo(member)
1666
Christian Heimes790c8232008-01-07 21:14:23 +00001667 # build the destination pathname, replacing
1668 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001669 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001670
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001671 if os.path.altsep:
1672 arcname = arcname.replace(os.path.altsep, os.path.sep)
1673 # interpret absolute pathname as relative, remove drive letter or
1674 # UNC path, redundant separators, "." and ".." components.
1675 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001676 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001677 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001678 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001679 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001680 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001681 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001682
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001683 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001684 targetpath = os.path.normpath(targetpath)
1685
1686 # Create all upper directories if necessary.
1687 upperdirs = os.path.dirname(targetpath)
1688 if upperdirs and not os.path.exists(upperdirs):
1689 os.makedirs(upperdirs)
1690
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001691 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001692 if not os.path.isdir(targetpath):
1693 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001694 return targetpath
1695
Antoine Pitrou17babc52012-11-17 23:50:08 +01001696 with self.open(member, pwd=pwd) as source, \
1697 open(targetpath, "wb") as target:
1698 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001699
1700 return targetpath
1701
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001702 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001703 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001704 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001705 import warnings
1706 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001707 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001708 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001709 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001710 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001711 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001712 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001713 if not self._allowZip64:
1714 requires_zip64 = None
1715 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1716 requires_zip64 = "Files count"
1717 elif zinfo.file_size > ZIP64_LIMIT:
1718 requires_zip64 = "Filesize"
1719 elif zinfo.header_offset > ZIP64_LIMIT:
1720 requires_zip64 = "Zipfile size"
1721 if requires_zip64:
1722 raise LargeZipFile(requires_zip64 +
1723 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001724
Bo Baylesce237c72018-01-29 23:54:07 -06001725 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001726 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001727 """Put the bytes from filename into the archive under the name
1728 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001729 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001730 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001731 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001732 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001733 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001734 "Can't write to ZIP archive while an open writing handle exists"
1735 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001736
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001737 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001738 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001739
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001740 if zinfo.is_dir():
1741 zinfo.compress_size = 0
1742 zinfo.CRC = 0
1743 else:
1744 if compress_type is not None:
1745 zinfo.compress_type = compress_type
1746 else:
1747 zinfo.compress_type = self.compression
1748
Bo Baylesce237c72018-01-29 23:54:07 -06001749 if compresslevel is not None:
1750 zinfo._compresslevel = compresslevel
1751 else:
1752 zinfo._compresslevel = self.compresslevel
1753
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001754 if zinfo.is_dir():
1755 with self._lock:
1756 if self._seekable:
1757 self.fp.seek(self.start_dir)
1758 zinfo.header_offset = self.fp.tell() # Start of header bytes
1759 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001760 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001761 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001762
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001763 self._writecheck(zinfo)
1764 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001765
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001766 self.filelist.append(zinfo)
1767 self.NameToInfo[zinfo.filename] = zinfo
1768 self.fp.write(zinfo.FileHeader(False))
1769 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001770 else:
1771 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1772 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001773
Bo Baylesce237c72018-01-29 23:54:07 -06001774 def writestr(self, zinfo_or_arcname, data,
1775 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001776 """Write a file into the archive. The contents is 'data', which
1777 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1778 it is encoded as UTF-8 first.
1779 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001780 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001781 if isinstance(data, str):
1782 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001783 if not isinstance(zinfo_or_arcname, ZipInfo):
1784 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001785 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001786 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001787 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001788 if zinfo.filename[-1] == '/':
1789 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1790 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1791 else:
1792 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001793 else:
1794 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001795
1796 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001797 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001798 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001799 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001800 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001801 "Can't write to ZIP archive while an open writing handle exists."
1802 )
1803
1804 if compress_type is not None:
1805 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001806
Bo Baylesce237c72018-01-29 23:54:07 -06001807 if compresslevel is not None:
1808 zinfo._compresslevel = compresslevel
1809
Guido van Rossum85825dc2007-08-27 17:03:28 +00001810 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001811 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001812 with self.open(zinfo, mode='w') as dest:
1813 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001814
1815 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001816 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001817 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001818
1819 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001820 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001821 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001822 if self.fp is None:
1823 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001824
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001825 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001826 raise ValueError("Can't close the ZIP file while there is "
1827 "an open writing handle on it. "
1828 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001829
Antoine Pitrou17babc52012-11-17 23:50:08 +01001830 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001831 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001832 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001833 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001834 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001835 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001836 finally:
1837 fp = self.fp
1838 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001839 self._fpclose(fp)
1840
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001841 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001842 for zinfo in self.filelist: # write central directory
1843 dt = zinfo.date_time
1844 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1845 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1846 extra = []
1847 if zinfo.file_size > ZIP64_LIMIT \
1848 or zinfo.compress_size > ZIP64_LIMIT:
1849 extra.append(zinfo.file_size)
1850 extra.append(zinfo.compress_size)
1851 file_size = 0xffffffff
1852 compress_size = 0xffffffff
1853 else:
1854 file_size = zinfo.file_size
1855 compress_size = zinfo.compress_size
1856
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001857 if zinfo.header_offset > ZIP64_LIMIT:
1858 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001859 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001860 else:
1861 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001862
1863 extra_data = zinfo.extra
1864 min_version = 0
1865 if extra:
1866 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001867 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001868 extra_data = struct.pack(
1869 '<HH' + 'Q'*len(extra),
1870 1, 8*len(extra), *extra) + extra_data
1871
1872 min_version = ZIP64_VERSION
1873
1874 if zinfo.compress_type == ZIP_BZIP2:
1875 min_version = max(BZIP2_VERSION, min_version)
1876 elif zinfo.compress_type == ZIP_LZMA:
1877 min_version = max(LZMA_VERSION, min_version)
1878
1879 extract_version = max(min_version, zinfo.extract_version)
1880 create_version = max(min_version, zinfo.create_version)
1881 try:
1882 filename, flag_bits = zinfo._encodeFilenameFlags()
1883 centdir = struct.pack(structCentralDir,
1884 stringCentralDir, create_version,
1885 zinfo.create_system, extract_version, zinfo.reserved,
1886 flag_bits, zinfo.compress_type, dostime, dosdate,
1887 zinfo.CRC, compress_size, file_size,
1888 len(filename), len(extra_data), len(zinfo.comment),
1889 0, zinfo.internal_attr, zinfo.external_attr,
1890 header_offset)
1891 except DeprecationWarning:
1892 print((structCentralDir, stringCentralDir, create_version,
1893 zinfo.create_system, extract_version, zinfo.reserved,
1894 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1895 zinfo.CRC, compress_size, file_size,
1896 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1897 0, zinfo.internal_attr, zinfo.external_attr,
1898 header_offset), file=sys.stderr)
1899 raise
1900 self.fp.write(centdir)
1901 self.fp.write(filename)
1902 self.fp.write(extra_data)
1903 self.fp.write(zinfo.comment)
1904
1905 pos2 = self.fp.tell()
1906 # Write end-of-zip-archive record
1907 centDirCount = len(self.filelist)
1908 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001909 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001910 requires_zip64 = None
1911 if centDirCount > ZIP_FILECOUNT_LIMIT:
1912 requires_zip64 = "Files count"
1913 elif centDirOffset > ZIP64_LIMIT:
1914 requires_zip64 = "Central directory offset"
1915 elif centDirSize > ZIP64_LIMIT:
1916 requires_zip64 = "Central directory size"
1917 if requires_zip64:
1918 # Need to write the ZIP64 end-of-archive records
1919 if not self._allowZip64:
1920 raise LargeZipFile(requires_zip64 +
1921 " would require ZIP64 extensions")
1922 zip64endrec = struct.pack(
1923 structEndArchive64, stringEndArchive64,
1924 44, 45, 45, 0, 0, centDirCount, centDirCount,
1925 centDirSize, centDirOffset)
1926 self.fp.write(zip64endrec)
1927
1928 zip64locrec = struct.pack(
1929 structEndArchive64Locator,
1930 stringEndArchive64Locator, 0, pos2, 1)
1931 self.fp.write(zip64locrec)
1932 centDirCount = min(centDirCount, 0xFFFF)
1933 centDirSize = min(centDirSize, 0xFFFFFFFF)
1934 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1935
1936 endrec = struct.pack(structEndArchive, stringEndArchive,
1937 0, 0, centDirCount, centDirCount,
1938 centDirSize, centDirOffset, len(self._comment))
1939 self.fp.write(endrec)
1940 self.fp.write(self._comment)
1941 self.fp.flush()
1942
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001943 def _fpclose(self, fp):
1944 assert self._fileRefCnt > 0
1945 self._fileRefCnt -= 1
1946 if not self._fileRefCnt and not self._filePassed:
1947 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001948
1949
1950class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001951 """Class to create ZIP archives with Python library files and packages."""
1952
Georg Brandl8334fd92010-12-04 10:26:46 +00001953 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001954 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001955 ZipFile.__init__(self, file, mode=mode, compression=compression,
1956 allowZip64=allowZip64)
1957 self._optimize = optimize
1958
Christian Tismer59202e52013-10-21 03:59:23 +02001959 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001960 """Add all files from "pathname" to the ZIP archive.
1961
Fred Drake484d7352000-10-02 21:14:52 +00001962 If pathname is a package directory, search the directory and
1963 all package subdirectories recursively for all *.py and enter
1964 the modules into the archive. If pathname is a plain
1965 directory, listdir *.py and enter all modules. Else, pathname
1966 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001967 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001968 This method will compile the module.py into module.pyc if
1969 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001970 If filterfunc(pathname) is given, it is called with every argument.
1971 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001972 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001973 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001974 if filterfunc and not filterfunc(pathname):
1975 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001976 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001977 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001978 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001979 dir, name = os.path.split(pathname)
1980 if os.path.isdir(pathname):
1981 initname = os.path.join(pathname, "__init__.py")
1982 if os.path.isfile(initname):
1983 # This is a package directory, add it
1984 if basename:
1985 basename = "%s/%s" % (basename, name)
1986 else:
1987 basename = name
1988 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001989 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001990 fname, arcname = self._get_codename(initname[0:-3], basename)
1991 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001992 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001994 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001995 dirlist.remove("__init__.py")
1996 # Add all *.py files and package subdirectories
1997 for filename in dirlist:
1998 path = os.path.join(pathname, filename)
1999 root, ext = os.path.splitext(filename)
2000 if os.path.isdir(path):
2001 if os.path.isfile(os.path.join(path, "__init__.py")):
2002 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02002003 self.writepy(path, basename,
2004 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002005 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002006 if filterfunc and not filterfunc(path):
2007 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002008 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002009 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002010 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002011 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002012 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002013 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002014 self.write(fname, arcname)
2015 else:
2016 # This is NOT a package directory, add its files at top level
2017 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002018 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002019 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002020 path = os.path.join(pathname, filename)
2021 root, ext = os.path.splitext(filename)
2022 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002023 if filterfunc and not filterfunc(path):
2024 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002025 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002026 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002027 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002028 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002029 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002030 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002031 self.write(fname, arcname)
2032 else:
2033 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002034 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002035 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002036 fname, arcname = self._get_codename(pathname[0:-3], basename)
2037 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002038 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002039 self.write(fname, arcname)
2040
2041 def _get_codename(self, pathname, basename):
2042 """Return (filename, archivename) for the path.
2043
Fred Drake484d7352000-10-02 21:14:52 +00002044 Given a module name path, return the correct file path and
2045 archive name, compiling if necessary. For example, given
2046 /python/lib/string, return (/python/lib/string.pyc, string).
2047 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002048 def _compile(file, optimize=-1):
2049 import py_compile
2050 if self.debug:
2051 print("Compiling", file)
2052 try:
2053 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002054 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002055 print(err.msg)
2056 return False
2057 return True
2058
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002059 file_py = pathname + ".py"
2060 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002061 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2062 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2063 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002064 if self._optimize == -1:
2065 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002066 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002067 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2068 # Use .pyc file.
2069 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002070 elif (os.path.isfile(pycache_opt0) and
2071 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002072 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2073 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002074 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002075 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002076 elif (os.path.isfile(pycache_opt1) and
2077 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2078 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002079 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002080 fname = pycache_opt1
2081 arcname = file_pyc
2082 elif (os.path.isfile(pycache_opt2) and
2083 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2084 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2085 # file name in the archive.
2086 fname = pycache_opt2
2087 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002088 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002089 # Compile py into PEP 3147 pyc file.
2090 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002091 if sys.flags.optimize == 0:
2092 fname = pycache_opt0
2093 elif sys.flags.optimize == 1:
2094 fname = pycache_opt1
2095 else:
2096 fname = pycache_opt2
2097 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002098 else:
2099 fname = arcname = file_py
2100 else:
2101 # new mode: use given optimization level
2102 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002103 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002104 arcname = file_pyc
2105 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002106 arcname = file_pyc
2107 if self._optimize == 1:
2108 fname = pycache_opt1
2109 elif self._optimize == 2:
2110 fname = pycache_opt2
2111 else:
2112 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2113 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002114 if not (os.path.isfile(fname) and
2115 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2116 if not _compile(file_py, optimize=self._optimize):
2117 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002118 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002119 if basename:
2120 archivename = "%s/%s" % (basename, archivename)
2121 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002122
2123
shireenraoa4e29912019-08-24 11:26:41 -04002124def _unique_everseen(iterable, key=None):
2125 "List unique elements, preserving order. Remember all elements ever seen."
2126 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
2127 # unique_everseen('ABBCcAD', str.lower) --> A B C D
2128 seen = set()
2129 seen_add = seen.add
2130 if key is None:
2131 for element in itertools.filterfalse(seen.__contains__, iterable):
2132 seen_add(element)
2133 yield element
2134 else:
2135 for element in iterable:
2136 k = key(element)
2137 if k not in seen:
2138 seen_add(k)
2139 yield element
2140
2141
2142def _parents(path):
2143 """
2144 Given a path with elements separated by
2145 posixpath.sep, generate all parents of that path.
2146
2147 >>> list(_parents('b/d'))
2148 ['b']
2149 >>> list(_parents('/b/d/'))
2150 ['/b']
2151 >>> list(_parents('b/d/f/'))
2152 ['b/d', 'b']
2153 >>> list(_parents('b'))
2154 []
2155 >>> list(_parents(''))
2156 []
2157 """
2158 return itertools.islice(_ancestry(path), 1, None)
2159
2160
2161def _ancestry(path):
2162 """
2163 Given a path with elements separated by
2164 posixpath.sep, generate all elements of that path
2165
2166 >>> list(_ancestry('b/d'))
2167 ['b/d', 'b']
2168 >>> list(_ancestry('/b/d/'))
2169 ['/b/d', '/b']
2170 >>> list(_ancestry('b/d/f/'))
2171 ['b/d/f', 'b/d', 'b']
2172 >>> list(_ancestry('b'))
2173 ['b']
2174 >>> list(_ancestry(''))
2175 []
2176 """
2177 path = path.rstrip(posixpath.sep)
2178 while path and path != posixpath.sep:
2179 yield path
2180 path, tail = posixpath.split(path)
2181
2182
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002183class Path:
2184 """
2185 A pathlib-compatible interface for zip files.
2186
2187 Consider a zip file with this structure::
2188
2189 .
2190 ├── a.txt
2191 └── b
2192 ├── c.txt
2193 └── d
2194 └── e.txt
2195
2196 >>> data = io.BytesIO()
2197 >>> zf = ZipFile(data, 'w')
2198 >>> zf.writestr('a.txt', 'content of a')
2199 >>> zf.writestr('b/c.txt', 'content of c')
2200 >>> zf.writestr('b/d/e.txt', 'content of e')
2201 >>> zf.filename = 'abcde.zip'
2202
2203 Path accepts the zipfile object itself or a filename
2204
2205 >>> root = Path(zf)
2206
2207 From there, several path operations are available.
2208
2209 Directory iteration (including the zip file itself):
2210
2211 >>> a, b = root.iterdir()
2212 >>> a
2213 Path('abcde.zip', 'a.txt')
2214 >>> b
2215 Path('abcde.zip', 'b/')
2216
2217 name property:
2218
2219 >>> b.name
2220 'b'
2221
2222 join with divide operator:
2223
2224 >>> c = b / 'c.txt'
2225 >>> c
2226 Path('abcde.zip', 'b/c.txt')
2227 >>> c.name
2228 'c.txt'
2229
2230 Read text:
2231
2232 >>> c.read_text()
2233 'content of c'
2234
2235 existence:
2236
2237 >>> c.exists()
2238 True
2239 >>> (b / 'missing.txt').exists()
2240 False
2241
Xtreak0d702272019-06-03 04:42:33 +05302242 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002243
2244 >>> str(c)
2245 'abcde.zip/b/c.txt'
2246 """
2247
2248 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2249
2250 def __init__(self, root, at=""):
2251 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2252 self.at = at
2253
2254 @property
2255 def open(self):
2256 return functools.partial(self.root.open, self.at)
2257
2258 @property
2259 def name(self):
2260 return posixpath.basename(self.at.rstrip("/"))
2261
2262 def read_text(self, *args, **kwargs):
2263 with self.open() as strm:
2264 return io.TextIOWrapper(strm, *args, **kwargs).read()
2265
2266 def read_bytes(self):
2267 with self.open() as strm:
2268 return strm.read()
2269
2270 def _is_child(self, path):
2271 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2272
2273 def _next(self, at):
2274 return Path(self.root, at)
2275
2276 def is_dir(self):
2277 return not self.at or self.at.endswith("/")
2278
2279 def is_file(self):
2280 return not self.is_dir()
2281
2282 def exists(self):
2283 return self.at in self._names()
2284
2285 def iterdir(self):
2286 if not self.is_dir():
2287 raise ValueError("Can't listdir a file")
2288 subs = map(self._next, self._names())
2289 return filter(self._is_child, subs)
2290
2291 def __str__(self):
2292 return posixpath.join(self.root.filename, self.at)
2293
2294 def __repr__(self):
2295 return self.__repr.format(self=self)
2296
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002297 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002298 next = posixpath.join(self.at, add)
2299 next_dir = posixpath.join(self.at, add, "")
2300 names = self._names()
2301 return self._next(next_dir if next not in names and next_dir in names else next)
2302
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002303 __truediv__ = joinpath
2304
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002305 @staticmethod
shireenraoa4e29912019-08-24 11:26:41 -04002306 def _implied_dirs(names):
2307 return _unique_everseen(
2308 parent + "/"
2309 for name in names
2310 for parent in _parents(name)
2311 if parent + "/" not in names
2312 )
2313
2314 @classmethod
2315 def _add_implied_dirs(cls, names):
2316 return names + list(cls._implied_dirs(names))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002317
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002318 @property
2319 def parent(self):
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002320 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002321 if parent_at:
2322 parent_at += '/'
2323 return self._next(parent_at)
2324
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002325 def _names(self):
2326 return self._add_implied_dirs(self.root.namelist())
2327
2328
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002329def main(args=None):
2330 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002331
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002332 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002333 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002334 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002335 group.add_argument('-l', '--list', metavar='<zipfile>',
2336 help='Show listing of a zipfile')
2337 group.add_argument('-e', '--extract', nargs=2,
2338 metavar=('<zipfile>', '<output_dir>'),
2339 help='Extract zipfile into target dir')
2340 group.add_argument('-c', '--create', nargs='+',
2341 metavar=('<name>', '<file>'),
2342 help='Create zipfile from sources')
2343 group.add_argument('-t', '--test', metavar='<zipfile>',
2344 help='Test if a zipfile is valid')
2345 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002346
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002347 if args.test is not None:
2348 src = args.test
2349 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002350 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002351 if badfile:
2352 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002353 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002354
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002355 elif args.list is not None:
2356 src = args.list
2357 with ZipFile(src, 'r') as zf:
2358 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002359
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002360 elif args.extract is not None:
2361 src, curdir = args.extract
2362 with ZipFile(src, 'r') as zf:
2363 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002364
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002365 elif args.create is not None:
2366 zip_name = args.create.pop(0)
2367 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002368
2369 def addToZip(zf, path, zippath):
2370 if os.path.isfile(path):
2371 zf.write(path, zippath, ZIP_DEFLATED)
2372 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002373 if zippath:
2374 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002375 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002376 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002377 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002378 # else: ignore
2379
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002380 with ZipFile(zip_name, 'w') as zf:
2381 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002382 zippath = os.path.basename(path)
2383 if not zippath:
2384 zippath = os.path.basename(os.path.dirname(path))
2385 if zippath in ('', os.curdir, os.pardir):
2386 zippath = ''
2387 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002388
2389if __name__ == "__main__":
2390 main()