blob: f7a2a2e8b8ab9a91b62df1d8e2dfb92a8ebafa99 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
Miss Islington (bot)c410f382019-08-24 09:03:52 -070010import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000011import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040012import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000013import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040014import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000015import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040016import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020017import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040018import time
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -080019import contextlib
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020
21try:
Tim Peterse1190062001-01-15 03:34:38 +000022 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040024except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000026 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028try:
29 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040030except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020031 bz2 = None
32
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033try:
34 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040035except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 lzma = None
37
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020038__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020039 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Miss Islington (bot)5c1d7452020-05-25 23:44:57 -070040 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41 "Path"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Georg Brandl4d540882010-10-28 06:42:33 +000043class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
Georg Brandl4d540882010-10-28 06:42:33 +000053error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000056ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030057ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000058ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020064ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000065# Other ZIP compression methods not supported
66
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020067DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020071# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020072MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020073
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000079
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000082structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000102stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000103sizeCentralDir = struct.calcsize(structCentralDir)
104
Fred Drake3e038e52001-02-28 17:56:26 +0000105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000129stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130sizeFileHeader = struct.calcsize(structFileHeader)
131
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
Silas Sewell4ba3b502018-09-18 13:00:05 -0400167_DD_SIGNATURE = 0x08074b50
168
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300169_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
171def _strip_extra(extra, xids):
172 # Remove Extra Fields with specified IDs.
173 unpack = _EXTRA_FIELD_STRUCT.unpack
174 modified = False
175 buffer = []
176 start = i = 0
177 while i + 4 <= len(extra):
178 xid, xlen = unpack(extra[i : i + 4])
179 j = i + 4 + xlen
180 if xid in xids:
181 if i != start:
182 buffer.append(extra[start : i])
183 start = j
184 modified = True
185 i = j
186 if not modified:
187 return extra
188 return b''.join(buffer)
189
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000192 if _EndRecData(fp):
193 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200194 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000196 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000197
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000198def is_zipfile(filename):
199 """Quickly see if a file is a ZIP file by checking the magic number.
200
201 The filename argument may be a file or file-like object too.
202 """
203 result = False
204 try:
205 if hasattr(filename, "read"):
206 result = _check_zipfile(fp=filename)
207 else:
208 with open(filename, "rb") as fp:
209 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200210 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000211 pass
212 return result
213
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214def _EndRecData64(fpin, offset, endrec):
215 """
216 Read the ZIP64 end-of-archive records and use that to update endrec
217 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000218 try:
219 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200220 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000221 # If the seek fails, the file is not large enough to contain a ZIP64
222 # end-of-archive record, so just return the end record we were given.
223 return endrec
224
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200226 if len(data) != sizeEndCentDir64Locator:
227 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000228 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 return endrec
231
Francisco Facioniab0716e2019-05-29 00:15:11 +0100232 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000233 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000234
235 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200238 if len(data) != sizeEndCentDir64:
239 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000240 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200241 dircount, dircount2, dirsize, diroffset = \
242 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000243 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000244 return endrec
245
246 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000247 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000248 endrec[_ECD_DISK_NUMBER] = disk_num
249 endrec[_ECD_DISK_START] = disk_dir
250 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251 endrec[_ECD_ENTRIES_TOTAL] = dircount2
252 endrec[_ECD_SIZE] = dirsize
253 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000254 return endrec
255
256
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000257def _EndRecData(fpin):
258 """Return data from the "End of Central Directory" record, or None.
259
260 The data is a list of the nine items in the ZIP "End of central dir"
261 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
263 # Determine file size
264 fpin.seek(0, 2)
265 filesize = fpin.tell()
266
267 # Check to see if this is ZIP file with no archive comment (the
268 # "end of central directory" structure should be the last item in the
269 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000270 try:
271 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200272 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000273 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000274 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200275 if (len(data) == sizeEndCentDir and
276 data[0:4] == stringEndArchive and
277 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000279 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000280 endrec=list(endrec)
281
282 # Append a blank comment and record start offset
283 endrec.append(b"")
284 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000286 # Try to read the "Zip64 end of central directory" structure
287 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000288
289 # Either this is not a ZIP file, or it is a ZIP file with an archive
290 # comment. Search the end of the file for the "end of central directory"
291 # record signature. The comment is the last item in the ZIP file and may be
292 # up to 64K long. It is assumed that the "end of central directory" magic
293 # number does not appear in the comment.
294 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000296 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000297 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000298 if start >= 0:
299 # found the magic number; attempt to unpack and interpret
300 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200301 if len(recData) != sizeEndCentDir:
302 # Zip file is corrupted.
303 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000304 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400305 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307 endrec.append(comment)
308 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000309
R David Murray4fbb9db2011-06-09 15:50:51 -0400310 # Try to read the "Zip64 end of central directory" structure
311 return _EndRecData64(fpin, maxCommentStart + start - filesize,
312 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000313
314 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200315 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000316
Fred Drake484d7352000-10-02 21:14:52 +0000317
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000319 """Class with attributes describing each file in the ZIP archive."""
320
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000321 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200322 'orig_filename',
323 'filename',
324 'date_time',
325 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600326 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200327 'comment',
328 'extra',
329 'create_system',
330 'create_version',
331 'extract_version',
332 'reserved',
333 'flag_bits',
334 'volume',
335 'internal_attr',
336 'external_attr',
337 'header_offset',
338 'CRC',
339 'compress_size',
340 'file_size',
341 '_raw_time',
342 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000343
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000344 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000345 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346
347 # Terminate the file name at the first null byte. Null bytes in file
348 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000349 null_byte = filename.find(chr(0))
350 if null_byte >= 0:
351 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 # This is used to ensure paths in generated ZIP files always use
353 # forward slashes as the directory separator, as required by the
354 # ZIP format specification.
355 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000356 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357
Greg Ward8e36d282003-06-18 00:53:06 +0000358 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000359 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800360
361 if date_time[0] < 1980:
362 raise ValueError('ZIP does not support timestamps before 1980')
363
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000364 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000365 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600366 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000367 self.comment = b"" # Comment for each file
368 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000369 if sys.platform == 'win32':
370 self.create_system = 0 # System which created ZIP archive
371 else:
372 # Assume everything else is unix-y
373 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200374 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
375 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000376 self.reserved = 0 # Must be zero
377 self.flag_bits = 0 # ZIP flag bits
378 self.volume = 0 # Volume number of file header
379 self.internal_attr = 0 # Internal attributes
380 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000382 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000383 # CRC CRC-32 of the uncompressed file
384 # compress_size Size of the compressed file
385 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200387 def __repr__(self):
388 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389 if self.compress_type != ZIP_STORED:
390 result.append(' compress_type=%s' %
391 compressor_names.get(self.compress_type,
392 self.compress_type))
393 hi = self.external_attr >> 16
394 lo = self.external_attr & 0xFFFF
395 if hi:
396 result.append(' filemode=%r' % stat.filemode(hi))
397 if lo:
398 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200399 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200400 if not isdir or self.file_size:
401 result.append(' file_size=%r' % self.file_size)
402 if ((not isdir or self.compress_size) and
403 (self.compress_type != ZIP_STORED or
404 self.file_size != self.compress_size)):
405 result.append(' compress_size=%r' % self.compress_size)
406 result.append('>')
407 return ''.join(result)
408
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200409 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200410 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000411 dt = self.date_time
412 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000413 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000414 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000415 # Set these to zero because we write them after the file data
416 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000417 else:
Tim Peterse1190062001-01-15 03:34:38 +0000418 CRC = self.CRC
419 compress_size = self.compress_size
420 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000421
422 extra = self.extra
423
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200424 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200425 if zip64 is None:
426 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000428 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000429 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200430 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200431 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432 if not zip64:
433 raise LargeZipFile("Filesize would require ZIP64 extensions")
434 # File is larger than what fits into a 4 byte integer,
435 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000436 file_size = 0xffffffff
437 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000439
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200440 if self.compress_type == ZIP_BZIP2:
441 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200442 elif self.compress_type == ZIP_LZMA:
443 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200444
445 self.extract_version = max(min_version, self.extract_version)
446 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000447 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000448 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200449 self.extract_version, self.reserved, flag_bits,
450 self.compress_type, dostime, dosdate, CRC,
451 compress_size, file_size,
452 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000453 return header + filename + extra
454
455 def _encodeFilenameFlags(self):
456 try:
457 return self.filename.encode('ascii'), self.flag_bits
458 except UnicodeEncodeError:
459 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000460
461 def _decodeExtra(self):
462 # Try to decode the extra field.
463 extra = self.extra
464 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700465 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000466 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200467 if ln+4 > len(extra):
468 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000470 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000471 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000472 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000473 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000474 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000475 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000476 elif ln == 0:
477 counts = ()
478 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300479 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000480
481 idx = 0
482
483 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000484 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Miss Skeleton (bot)3801b262019-10-29 00:44:07 -0700485 if len(counts) <= idx:
486 raise BadZipFile(
487 "Corrupt zip64 extra field. File size not found."
488 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000489 self.file_size = counts[idx]
490 idx += 1
491
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000492 if self.compress_size == 0xFFFFFFFF:
Miss Skeleton (bot)3801b262019-10-29 00:44:07 -0700493 if len(counts) <= idx:
494 raise BadZipFile(
495 "Corrupt zip64 extra field. Compress size not found."
496 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000497 self.compress_size = counts[idx]
498 idx += 1
499
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000500 if self.header_offset == 0xffffffff:
Miss Skeleton (bot)3801b262019-10-29 00:44:07 -0700501 if len(counts) <= idx:
502 raise BadZipFile(
503 "Corrupt zip64 extra field. Header offset not found."
504 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000505 old = self.header_offset
506 self.header_offset = counts[idx]
507 idx+=1
508
509 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000510
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200511 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200512 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200513 """Construct an appropriate ZipInfo for a file on the filesystem.
514
515 filename should be the path to a file or directory on the filesystem.
516
517 arcname is the name which it will have within the archive (by default,
518 this will be the same as filename, but without a drive letter and with
519 leading path separators removed).
520 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200521 if isinstance(filename, os.PathLike):
522 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200523 st = os.stat(filename)
524 isdir = stat.S_ISDIR(st.st_mode)
525 mtime = time.localtime(st.st_mtime)
526 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200527 if not strict_timestamps and date_time[0] < 1980:
528 date_time = (1980, 1, 1, 0, 0, 0)
529 elif not strict_timestamps and date_time[0] > 2107:
530 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200531 # Create ZipInfo instance to store file information
532 if arcname is None:
533 arcname = filename
534 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
535 while arcname[0] in (os.sep, os.altsep):
536 arcname = arcname[1:]
537 if isdir:
538 arcname += '/'
539 zinfo = cls(arcname, date_time)
540 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
541 if isdir:
542 zinfo.file_size = 0
543 zinfo.external_attr |= 0x10 # MS-DOS directory flag
544 else:
545 zinfo.file_size = st.st_size
546
547 return zinfo
548
549 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300550 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200551 return self.filename[-1] == '/'
552
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000553
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300554# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
555# internal keys. We noticed that a direct implementation is faster than
556# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000557
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300558_crctable = None
559def _gen_crc(crc):
560 for j in range(8):
561 if crc & 1:
562 crc = (crc >> 1) ^ 0xEDB88320
563 else:
564 crc >>= 1
565 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000566
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300567# ZIP supports a password-based form of encryption. Even though known
568# plaintext attacks have been found against it, it is still useful
569# to be able to get data out of such a file.
570#
571# Usage:
572# zd = _ZipDecrypter(mypwd)
573# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000574
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300575def _ZipDecrypter(pwd):
576 key0 = 305419896
577 key1 = 591751049
578 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000579
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300580 global _crctable
581 if _crctable is None:
582 _crctable = list(map(_gen_crc, range(256)))
583 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000584
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300585 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000586 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300587 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000588
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300589 def update_keys(c):
590 nonlocal key0, key1, key2
591 key0 = crc32(c, key0)
592 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
593 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
594 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000595
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300596 for p in pwd:
597 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000598
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300599 def decrypter(data):
600 """Decrypt a bytes object."""
601 result = bytearray()
602 append = result.append
603 for c in data:
604 k = key2 | 2
605 c ^= ((k * (k^1)) >> 8) & 0xFF
606 update_keys(c)
607 append(c)
608 return bytes(result)
609
610 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000611
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200612
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200613class LZMACompressor:
614
615 def __init__(self):
616 self._comp = None
617
618 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200619 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200620 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200621 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200622 ])
623 return struct.pack('<BBH', 9, 4, len(props)) + props
624
625 def compress(self, data):
626 if self._comp is None:
627 return self._init() + self._comp.compress(data)
628 return self._comp.compress(data)
629
630 def flush(self):
631 if self._comp is None:
632 return self._init() + self._comp.flush()
633 return self._comp.flush()
634
635
636class LZMADecompressor:
637
638 def __init__(self):
639 self._decomp = None
640 self._unconsumed = b''
641 self.eof = False
642
643 def decompress(self, data):
644 if self._decomp is None:
645 self._unconsumed += data
646 if len(self._unconsumed) <= 4:
647 return b''
648 psize, = struct.unpack('<H', self._unconsumed[2:4])
649 if len(self._unconsumed) <= 4 + psize:
650 return b''
651
652 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200653 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
654 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200655 ])
656 data = self._unconsumed[4 + psize:]
657 del self._unconsumed
658
659 result = self._decomp.decompress(data)
660 self.eof = self._decomp.eof
661 return result
662
663
664compressor_names = {
665 0: 'store',
666 1: 'shrink',
667 2: 'reduce',
668 3: 'reduce',
669 4: 'reduce',
670 5: 'reduce',
671 6: 'implode',
672 7: 'tokenize',
673 8: 'deflate',
674 9: 'deflate64',
675 10: 'implode',
676 12: 'bzip2',
677 14: 'lzma',
678 18: 'terse',
679 19: 'lz77',
680 97: 'wavpack',
681 98: 'ppmd',
682}
683
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684def _check_compression(compression):
685 if compression == ZIP_STORED:
686 pass
687 elif compression == ZIP_DEFLATED:
688 if not zlib:
689 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200690 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200691 elif compression == ZIP_BZIP2:
692 if not bz2:
693 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200694 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200695 elif compression == ZIP_LZMA:
696 if not lzma:
697 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200698 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200699 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300700 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200701
702
Bo Baylesce237c72018-01-29 23:54:07 -0600703def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200704 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600705 if compresslevel is not None:
706 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
707 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200708 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600709 if compresslevel is not None:
710 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200711 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600712 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200713 elif compress_type == ZIP_LZMA:
714 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200715 else:
716 return None
717
718
719def _get_decompressor(compress_type):
Miss Islington (bot)717cc612019-09-12 07:33:53 -0700720 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200721 if compress_type == ZIP_STORED:
722 return None
723 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200724 return zlib.decompressobj(-15)
725 elif compress_type == ZIP_BZIP2:
726 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200727 elif compress_type == ZIP_LZMA:
728 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200729 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200730 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200731 if descr:
732 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
733 else:
734 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200735
736
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200737class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300738 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200739 self._file = file
740 self._pos = pos
741 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200742 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300743 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700744 self.seekable = file.seekable
745 self.tell = file.tell
746
747 def seek(self, offset, whence=0):
748 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200749 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700750 raise ValueError("Can't reposition in the ZIP file while "
751 "there is an open writing handle on it. "
752 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200753 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700754 self._pos = self._file.tell()
755 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200756
757 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200758 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300759 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300760 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300761 "is an open writing handle on it. "
762 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200763 self._file.seek(self._pos)
764 data = self._file.read(n)
765 self._pos = self._file.tell()
766 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200767
768 def close(self):
769 if self._file is not None:
770 fileobj = self._file
771 self._file = None
772 self._close(fileobj)
773
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200774# Provide the tell method for unseekable stream
775class _Tellable:
776 def __init__(self, fp):
777 self.fp = fp
778 self.offset = 0
779
780 def write(self, data):
781 n = self.fp.write(data)
782 self.offset += n
783 return n
784
785 def tell(self):
786 return self.offset
787
788 def flush(self):
789 self.fp.flush()
790
791 def close(self):
792 self.fp.close()
793
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200794
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000795class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796 """File-like object for reading an archive member.
797 Is returned by ZipFile.open().
798 """
799
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800 # Max size supported by decompressor.
801 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000803 # Read from compressed files in 4k blocks.
804 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805
John Jolly066df4f2018-01-30 01:51:35 -0700806 # Chunk size to read during seek
807 MAX_SEEK_READ = 1 << 24
808
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -0700809 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000810 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000811 self._fileobj = fileobj
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -0700812 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000813 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000814
Ezio Melotti92b47432010-01-28 01:44:41 +0000815 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000816 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200817 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000818
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200819 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000820
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200821 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000822 self._readbuffer = b''
823 self._offset = 0
824
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000825 self.newlines = None
826
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000827 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 self.name = zipinfo.filename
829
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000830 if hasattr(zipinfo, 'CRC'):
831 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000832 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000833 else:
834 self._expected_crc = None
835
John Jolly066df4f2018-01-30 01:51:35 -0700836 self._seekable = False
837 try:
838 if fileobj.seekable():
839 self._orig_compress_start = fileobj.tell()
840 self._orig_compress_size = zipinfo.compress_size
841 self._orig_file_size = zipinfo.file_size
842 self._orig_start_crc = self._running_crc
843 self._seekable = True
844 except AttributeError:
845 pass
846
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -0700847 self._decrypter = None
848 if pwd:
849 if zipinfo.flag_bits & 0x8:
850 # compare against the file type from extended local headers
851 check_byte = (zipinfo._raw_time >> 8) & 0xff
852 else:
853 # compare against the CRC otherwise
854 check_byte = (zipinfo.CRC >> 24) & 0xff
855 h = self._init_decrypter()
856 if h != check_byte:
857 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
858
859
860 def _init_decrypter(self):
861 self._decrypter = _ZipDecrypter(self._pwd)
862 # The first 12 bytes in the cypher stream is an encryption header
863 # used to strengthen the algorithm. The first 11 bytes are
864 # completely random, while the 12th contains the MSB of the CRC,
865 # or the MSB of the file time depending on the header type
866 # and is used to check the correctness of the password.
867 header = self._fileobj.read(12)
868 self._compress_left -= 12
869 return self._decrypter(header)[11]
870
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200871 def __repr__(self):
872 result = ['<%s.%s' % (self.__class__.__module__,
873 self.__class__.__qualname__)]
874 if not self.closed:
875 result.append(' name=%r mode=%r' % (self.name, self.mode))
876 if self._compress_type != ZIP_STORED:
877 result.append(' compress_type=%s' %
878 compressor_names.get(self._compress_type,
879 self._compress_type))
880 else:
881 result.append(' [closed]')
882 result.append('>')
883 return ''.join(result)
884
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000885 def readline(self, limit=-1):
886 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000888 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890
Serhiy Storchakae670be22016-06-11 19:32:44 +0300891 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000892 # Shortcut common case - newline found in buffer.
893 i = self._readbuffer.find(b'\n', self._offset) + 1
894 if i > 0:
895 line = self._readbuffer[self._offset: i]
896 self._offset = i
897 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898
Serhiy Storchakae670be22016-06-11 19:32:44 +0300899 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000900
901 def peek(self, n=1):
902 """Returns buffered bytes without advancing the position."""
903 if n > len(self._readbuffer) - self._offset:
904 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200905 if len(chunk) > self._offset:
906 self._readbuffer = chunk + self._readbuffer[self._offset:]
907 self._offset = 0
908 else:
909 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000910
911 # Return up to 512 bytes to reduce allocation overhead for tight loops.
912 return self._readbuffer[self._offset: self._offset + 512]
913
914 def readable(self):
915 return True
916
917 def read(self, n=-1):
918 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800919 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000920 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200921 if n is None or n < 0:
922 buf = self._readbuffer[self._offset:]
923 self._readbuffer = b''
924 self._offset = 0
925 while not self._eof:
926 buf += self._read1(self.MAX_N)
927 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928
Antoine Pitrou78157b32012-06-23 16:44:48 +0200929 end = n + self._offset
930 if end < len(self._readbuffer):
931 buf = self._readbuffer[self._offset:end]
932 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200933 return buf
934
Antoine Pitrou78157b32012-06-23 16:44:48 +0200935 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200936 buf = self._readbuffer[self._offset:]
937 self._readbuffer = b''
938 self._offset = 0
939 while n > 0 and not self._eof:
940 data = self._read1(n)
941 if n < len(data):
942 self._readbuffer = data
943 self._offset = n
944 buf += data[:n]
945 break
946 buf += data
947 n -= len(data)
948 return buf
949
950 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000951 # Update the CRC using the given data.
952 if self._expected_crc is None:
953 # No need to compute the CRC if we don't have a reference value
954 return
Martin Panterb82032f2015-12-11 05:19:29 +0000955 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000956 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200957 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000958 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000959
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000960 def read1(self, n):
961 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200963 if n is None or n < 0:
964 buf = self._readbuffer[self._offset:]
965 self._readbuffer = b''
966 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300967 while not self._eof:
968 data = self._read1(self.MAX_N)
969 if data:
970 buf += data
971 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200972 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000973
Antoine Pitrou78157b32012-06-23 16:44:48 +0200974 end = n + self._offset
975 if end < len(self._readbuffer):
976 buf = self._readbuffer[self._offset:end]
977 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200978 return buf
979
Antoine Pitrou78157b32012-06-23 16:44:48 +0200980 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200981 buf = self._readbuffer[self._offset:]
982 self._readbuffer = b''
983 self._offset = 0
984 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300985 while not self._eof:
986 data = self._read1(n)
987 if n < len(data):
988 self._readbuffer = data
989 self._offset = n
990 buf += data[:n]
991 break
992 if data:
993 buf += data
994 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200995 return buf
996
997 def _read1(self, n):
998 # Read up to n compressed bytes with at most one read() system call,
999 # decrypt and decompress them.
1000 if self._eof or n <= 0:
1001 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +00001002
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001003 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001004 if self._compress_type == ZIP_DEFLATED:
1005 ## Handle unconsumed data.
1006 data = self._decompressor.unconsumed_tail
1007 if n > len(data):
1008 data += self._read2(n - len(data))
1009 else:
1010 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001012 if self._compress_type == ZIP_STORED:
1013 self._eof = self._compress_left <= 0
1014 elif self._compress_type == ZIP_DEFLATED:
1015 n = max(n, self.MIN_READ_SIZE)
1016 data = self._decompressor.decompress(data, n)
1017 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +02001018 self._compress_left <= 0 and
1019 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001020 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001021 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001022 else:
1023 data = self._decompressor.decompress(data)
1024 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001025
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001026 data = data[:self._left]
1027 self._left -= len(data)
1028 if self._left <= 0:
1029 self._eof = True
1030 self._update_crc(data)
1031 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001032
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001033 def _read2(self, n):
1034 if self._compress_left <= 0:
1035 return b''
1036
1037 n = max(n, self.MIN_READ_SIZE)
1038 n = min(n, self._compress_left)
1039
1040 data = self._fileobj.read(n)
1041 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001042 if not data:
1043 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001044
1045 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001046 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001047 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001048
Łukasz Langae94980a2010-11-22 23:31:26 +00001049 def close(self):
1050 try:
1051 if self._close_fileobj:
1052 self._fileobj.close()
1053 finally:
1054 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055
John Jolly066df4f2018-01-30 01:51:35 -07001056 def seekable(self):
1057 return self._seekable
1058
1059 def seek(self, offset, whence=0):
1060 if not self._seekable:
1061 raise io.UnsupportedOperation("underlying stream is not seekable")
1062 curr_pos = self.tell()
1063 if whence == 0: # Seek from start of file
1064 new_pos = offset
1065 elif whence == 1: # Seek from current position
1066 new_pos = curr_pos + offset
1067 elif whence == 2: # Seek from EOF
1068 new_pos = self._orig_file_size + offset
1069 else:
1070 raise ValueError("whence must be os.SEEK_SET (0), "
1071 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1072
1073 if new_pos > self._orig_file_size:
1074 new_pos = self._orig_file_size
1075
1076 if new_pos < 0:
1077 new_pos = 0
1078
1079 read_offset = new_pos - curr_pos
1080 buff_offset = read_offset + self._offset
1081
1082 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1083 # Just move the _offset index if the new position is in the _readbuffer
1084 self._offset = buff_offset
1085 read_offset = 0
1086 elif read_offset < 0:
1087 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001088 self._fileobj.seek(self._orig_compress_start)
1089 self._running_crc = self._orig_start_crc
1090 self._compress_left = self._orig_compress_size
1091 self._left = self._orig_file_size
1092 self._readbuffer = b''
1093 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001094 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001095 self._eof = False
1096 read_offset = new_pos
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -07001097 if self._decrypter is not None:
1098 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001099
1100 while read_offset > 0:
1101 read_len = min(self.MAX_SEEK_READ, read_offset)
1102 self.read(read_len)
1103 read_offset -= read_len
1104
1105 return self.tell()
1106
1107 def tell(self):
1108 if not self._seekable:
1109 raise io.UnsupportedOperation("underlying stream is not seekable")
1110 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1111 return filepos
1112
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001113
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001114class _ZipWriteFile(io.BufferedIOBase):
1115 def __init__(self, zf, zinfo, zip64):
1116 self._zinfo = zinfo
1117 self._zip64 = zip64
1118 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001119 self._compressor = _get_compressor(zinfo.compress_type,
1120 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001121 self._file_size = 0
1122 self._compress_size = 0
1123 self._crc = 0
1124
1125 @property
1126 def _fileobj(self):
1127 return self._zipfile.fp
1128
1129 def writable(self):
1130 return True
1131
1132 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001133 if self.closed:
1134 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001135 nbytes = len(data)
1136 self._file_size += nbytes
1137 self._crc = crc32(data, self._crc)
1138 if self._compressor:
1139 data = self._compressor.compress(data)
1140 self._compress_size += len(data)
1141 self._fileobj.write(data)
1142 return nbytes
1143
1144 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001145 if self.closed:
1146 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001147 try:
1148 super().close()
1149 # Flush any data from the compressor, and update header info
1150 if self._compressor:
1151 buf = self._compressor.flush()
1152 self._compress_size += len(buf)
1153 self._fileobj.write(buf)
1154 self._zinfo.compress_size = self._compress_size
1155 else:
1156 self._zinfo.compress_size = self._file_size
1157 self._zinfo.CRC = self._crc
1158 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001159
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001160 # Write updated header info
1161 if self._zinfo.flag_bits & 0x08:
1162 # Write CRC and file sizes after the file data
1163 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1164 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1165 self._zinfo.compress_size, self._zinfo.file_size))
1166 self._zipfile.start_dir = self._fileobj.tell()
1167 else:
1168 if not self._zip64:
1169 if self._file_size > ZIP64_LIMIT:
1170 raise RuntimeError(
1171 'File size unexpectedly exceeded ZIP64 limit')
1172 if self._compress_size > ZIP64_LIMIT:
1173 raise RuntimeError(
1174 'Compressed size unexpectedly exceeded ZIP64 limit')
1175 # Seek backwards and write file header (which will now include
1176 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001177
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001178 # Preserve current position in file
1179 self._zipfile.start_dir = self._fileobj.tell()
1180 self._fileobj.seek(self._zinfo.header_offset)
1181 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1182 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001183
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001184 # Successfully written: Add file to our caches
1185 self._zipfile.filelist.append(self._zinfo)
1186 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1187 finally:
1188 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001189
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001190
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001191
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001193 """ Class with methods to open, read, write, close, list zip files.
1194
Bo Baylesce237c72018-01-29 23:54:07 -06001195 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1196 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001197
Fred Drake3d9091e2001-03-26 15:49:24 +00001198 file: Either the path to the file, or a file-like object.
1199 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001200 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1201 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001202 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1203 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001204 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1205 needed, otherwise it will raise an exception when this would
1206 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001207 compresslevel: None (default for the given compression type) or an integer
1208 specifying the level to pass to the compressor.
1209 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1210 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1211 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001212
Fred Drake3d9091e2001-03-26 15:49:24 +00001213 """
Fred Drake484d7352000-10-02 21:14:52 +00001214
Fred Drake90eac282001-02-28 05:29:34 +00001215 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001216 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001217
Bo Baylesce237c72018-01-29 23:54:07 -06001218 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001219 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001220 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1221 or append 'a'."""
1222 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001223 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001224
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001225 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001226
1227 self._allowZip64 = allowZip64
1228 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001229 self.debug = 0 # Level of printing: 0 through 3
1230 self.NameToInfo = {} # Find file info given name
1231 self.filelist = [] # List of ZipInfo instances for archive
1232 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001233 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001234 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001235 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001236 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001237 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001238
Fred Drake3d9091e2001-03-26 15:49:24 +00001239 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001240 if isinstance(file, os.PathLike):
1241 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001242 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001243 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001244 self._filePassed = 0
1245 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001246 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1247 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001248 filemode = modeDict[mode]
1249 while True:
1250 try:
1251 self.fp = io.open(file, filemode)
1252 except OSError:
1253 if filemode in modeDict:
1254 filemode = modeDict[filemode]
1255 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001256 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001257 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001258 else:
1259 self._filePassed = 1
1260 self.fp = file
1261 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001262 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001263 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001264 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001265 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001266
Antoine Pitrou17babc52012-11-17 23:50:08 +01001267 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001268 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001269 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001270 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001271 # set the modified flag so central directory gets written
1272 # even if no files are added to the archive
1273 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001274 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001275 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001276 except (AttributeError, OSError):
1277 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001278 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001279 self._seekable = False
1280 else:
1281 # Some file-like objects can provide tell() but not seek()
1282 try:
1283 self.fp.seek(self.start_dir)
1284 except (AttributeError, OSError):
1285 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001286 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001287 try:
1288 # See if file is a zip file
1289 self._RealGetContents()
1290 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001291 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001292 except BadZipFile:
1293 # file is not a zip file, just append
1294 self.fp.seek(0, 2)
1295
1296 # set the modified flag so central directory gets written
1297 # even if no files are added to the archive
1298 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001299 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001300 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001301 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001302 except:
1303 fp = self.fp
1304 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001305 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001306 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001308 def __enter__(self):
1309 return self
1310
1311 def __exit__(self, type, value, traceback):
1312 self.close()
1313
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001314 def __repr__(self):
1315 result = ['<%s.%s' % (self.__class__.__module__,
1316 self.__class__.__qualname__)]
1317 if self.fp is not None:
1318 if self._filePassed:
1319 result.append(' file=%r' % self.fp)
1320 elif self.filename is not None:
1321 result.append(' filename=%r' % self.filename)
1322 result.append(' mode=%r' % self.mode)
1323 else:
1324 result.append(' [closed]')
1325 result.append('>')
1326 return ''.join(result)
1327
Tim Peters7d3bad62001-04-04 18:56:49 +00001328 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001329 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001330 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001331 try:
1332 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001333 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001334 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001335 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001336 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001338 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001339 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1340 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001341 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001342
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001343 # "concat" is zero, unless zip was concatenated to another file
1344 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001345 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1346 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001347 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001348
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001350 inferred = concat + offset_cd
1351 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001353 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001354 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001355 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001356 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001357 total = 0
1358 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001359 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001360 if len(centdir) != sizeCentralDir:
1361 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001363 if centdir[_CD_SIGNATURE] != stringCentralDir:
1364 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001365 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001366 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001367 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001368 flags = centdir[5]
1369 if flags & 0x800:
1370 # UTF-8 file names extension
1371 filename = filename.decode('utf-8')
1372 else:
1373 # Historical ZIP filename encoding
1374 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001375 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001376 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001377 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1378 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001379 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001380 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001381 x.flag_bits, x.compress_type, t, d,
1382 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001383 if x.extract_version > MAX_EXTRACT_VERSION:
1384 raise NotImplementedError("zip file version %.1f" %
1385 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1387 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001388 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001389 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001390 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001391
1392 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001393 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001394 self.filelist.append(x)
1395 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001396
1397 # update total bytes read from central directory
1398 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1399 + centdir[_CD_EXTRA_FIELD_LENGTH]
1400 + centdir[_CD_COMMENT_LENGTH])
1401
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001402 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001403 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001404
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001405
1406 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001407 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001408 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001409
1410 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001411 """Return a list of class ZipInfo instances for files in the
1412 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001413 return self.filelist
1414
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001415 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001416 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001417 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1418 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001419 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001420 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001421 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1422 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001423
1424 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001425 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001426 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001427 for zinfo in self.filelist:
1428 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001429 # Read by chunks, to avoid an OverflowError or a
1430 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001431 with self.open(zinfo.filename, "r") as f:
1432 while f.read(chunk_size): # Check CRC-32
1433 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001434 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001435 return zinfo.filename
1436
1437 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001438 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001439 info = self.NameToInfo.get(name)
1440 if info is None:
1441 raise KeyError(
1442 'There is no item named %r in the archive' % name)
1443
1444 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001445
Thomas Wouterscf297e42007-02-23 15:07:44 +00001446 def setpassword(self, pwd):
1447 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001448 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001449 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001450 if pwd:
1451 self.pwd = pwd
1452 else:
1453 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001454
R David Murrayf50b38a2012-04-12 18:44:58 -04001455 @property
1456 def comment(self):
1457 """The comment text associated with the ZIP file."""
1458 return self._comment
1459
1460 @comment.setter
1461 def comment(self, comment):
1462 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001463 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001464 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001465 if len(comment) > ZIP_MAX_COMMENT:
1466 import warnings
1467 warnings.warn('Archive comment is too long; truncating to %d bytes'
1468 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001469 comment = comment[:ZIP_MAX_COMMENT]
1470 self._comment = comment
1471 self._didModify = True
1472
Thomas Wouterscf297e42007-02-23 15:07:44 +00001473 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001474 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001475 with self.open(name, "r", pwd) as fp:
1476 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001477
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001478 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001479 """Return file-like object for 'name'.
1480
1481 name is a string for the file name within the ZIP file, or a ZipInfo
1482 object.
1483
1484 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1485 write to a file newly added to the archive.
1486
1487 pwd is the password to decrypt files (only used for reading).
1488
1489 When writing, if the file size is not known in advance but may exceed
1490 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1491 files. If the size is known in advance, it is best to pass a ZipInfo
1492 instance for name, with zinfo.file_size set.
1493 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001494 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001495 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001496 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001497 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001498 if pwd and (mode == "w"):
1499 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001500 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001501 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001502 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001503
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001504 # Make sure we have an info object
1505 if isinstance(name, ZipInfo):
1506 # 'name' is already an info object
1507 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001508 elif mode == 'w':
1509 zinfo = ZipInfo(name)
1510 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001511 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001512 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001513 # Get info object for name
1514 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001515
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001516 if mode == 'w':
1517 return self._open_to_write(zinfo, force_zip64=force_zip64)
1518
1519 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001520 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001521 "is an open writing handle on it. "
1522 "Close the writing handle before trying to read.")
1523
1524 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001525 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001526 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1527 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001528 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001529 # Skip the file header:
1530 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001531 if len(fheader) != sizeFileHeader:
1532 raise BadZipFile("Truncated file header")
1533 fheader = struct.unpack(structFileHeader, fheader)
1534 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001535 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001536
Antoine Pitrou17babc52012-11-17 23:50:08 +01001537 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1538 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1539 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001540
Antoine Pitrou8572da52012-11-17 23:52:05 +01001541 if zinfo.flag_bits & 0x20:
1542 # Zip 2.7: compressed patched data
1543 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001544
Antoine Pitrou8572da52012-11-17 23:52:05 +01001545 if zinfo.flag_bits & 0x40:
1546 # strong encryption
1547 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001548
Antoine Pitrou17babc52012-11-17 23:50:08 +01001549 if zinfo.flag_bits & 0x800:
1550 # UTF-8 filename
1551 fname_str = fname.decode("utf-8")
1552 else:
1553 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001554
Antoine Pitrou17babc52012-11-17 23:50:08 +01001555 if fname_str != zinfo.orig_filename:
1556 raise BadZipFile(
1557 'File name in directory %r and header %r differ.'
1558 % (zinfo.orig_filename, fname))
1559
1560 # check for encrypted flag & handle password
1561 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001562 if is_encrypted:
1563 if not pwd:
1564 pwd = self.pwd
1565 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001566 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001567 "required for extraction" % name)
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -07001568 else:
1569 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001570
Miss Skeleton (bot)76fbdaa2019-10-27 01:40:44 -07001571 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001572 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001573 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001574 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001575
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001576 def _open_to_write(self, zinfo, force_zip64=False):
1577 if force_zip64 and not self._allowZip64:
1578 raise ValueError(
1579 "force_zip64 is True, but allowZip64 was False when opening "
1580 "the ZIP file."
1581 )
1582 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001583 raise ValueError("Can't write to the ZIP file while there is "
1584 "another write handle open on it. "
1585 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001586
1587 # Sizes and CRC are overwritten with correct data after processing the file
1588 if not hasattr(zinfo, 'file_size'):
1589 zinfo.file_size = 0
1590 zinfo.compress_size = 0
1591 zinfo.CRC = 0
1592
1593 zinfo.flag_bits = 0x00
1594 if zinfo.compress_type == ZIP_LZMA:
1595 # Compressed data includes an end-of-stream (EOS) marker
1596 zinfo.flag_bits |= 0x02
1597 if not self._seekable:
1598 zinfo.flag_bits |= 0x08
1599
1600 if not zinfo.external_attr:
1601 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1602
1603 # Compressed size can be larger than uncompressed size
1604 zip64 = self._allowZip64 and \
1605 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1606
1607 if self._seekable:
1608 self.fp.seek(self.start_dir)
1609 zinfo.header_offset = self.fp.tell()
1610
1611 self._writecheck(zinfo)
1612 self._didModify = True
1613
1614 self.fp.write(zinfo.FileHeader(zip64))
1615
1616 self._writing = True
1617 return _ZipWriteFile(self, zinfo, zip64)
1618
Christian Heimes790c8232008-01-07 21:14:23 +00001619 def extract(self, member, path=None, pwd=None):
1620 """Extract a member from the archive to the current working directory,
1621 using its full name. Its file information is extracted as accurately
1622 as possible. `member' may be a filename or a ZipInfo object. You can
1623 specify a different directory using `path'.
1624 """
Christian Heimes790c8232008-01-07 21:14:23 +00001625 if path is None:
1626 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001627 else:
1628 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001629
1630 return self._extract_member(member, path, pwd)
1631
1632 def extractall(self, path=None, members=None, pwd=None):
1633 """Extract all members from the archive to the current working
1634 directory. `path' specifies a different directory to extract to.
1635 `members' is optional and must be a subset of the list returned
1636 by namelist().
1637 """
1638 if members is None:
1639 members = self.namelist()
1640
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001641 if path is None:
1642 path = os.getcwd()
1643 else:
1644 path = os.fspath(path)
1645
Christian Heimes790c8232008-01-07 21:14:23 +00001646 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001647 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001648
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001649 @classmethod
1650 def _sanitize_windows_name(cls, arcname, pathsep):
1651 """Replace bad characters and remove trailing dots from parts."""
1652 table = cls._windows_illegal_name_trans_table
1653 if not table:
1654 illegal = ':<>|"?*'
1655 table = str.maketrans(illegal, '_' * len(illegal))
1656 cls._windows_illegal_name_trans_table = table
1657 arcname = arcname.translate(table)
1658 # remove trailing dots
1659 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1660 # rejoin, removing empty parts.
1661 arcname = pathsep.join(x for x in arcname if x)
1662 return arcname
1663
Christian Heimes790c8232008-01-07 21:14:23 +00001664 def _extract_member(self, member, targetpath, pwd):
1665 """Extract the ZipInfo object 'member' to a physical
1666 file on the path targetpath.
1667 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001668 if not isinstance(member, ZipInfo):
1669 member = self.getinfo(member)
1670
Christian Heimes790c8232008-01-07 21:14:23 +00001671 # build the destination pathname, replacing
1672 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001673 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001674
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001675 if os.path.altsep:
1676 arcname = arcname.replace(os.path.altsep, os.path.sep)
1677 # interpret absolute pathname as relative, remove drive letter or
1678 # UNC path, redundant separators, "." and ".." components.
1679 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001680 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001681 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001682 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001683 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001684 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001685 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001686
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001687 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001688 targetpath = os.path.normpath(targetpath)
1689
1690 # Create all upper directories if necessary.
1691 upperdirs = os.path.dirname(targetpath)
1692 if upperdirs and not os.path.exists(upperdirs):
1693 os.makedirs(upperdirs)
1694
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001695 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001696 if not os.path.isdir(targetpath):
1697 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001698 return targetpath
1699
Antoine Pitrou17babc52012-11-17 23:50:08 +01001700 with self.open(member, pwd=pwd) as source, \
1701 open(targetpath, "wb") as target:
1702 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001703
1704 return targetpath
1705
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001706 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001707 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001708 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001709 import warnings
1710 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001711 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001712 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001713 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001714 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001715 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001716 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001717 if not self._allowZip64:
1718 requires_zip64 = None
1719 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1720 requires_zip64 = "Files count"
1721 elif zinfo.file_size > ZIP64_LIMIT:
1722 requires_zip64 = "Filesize"
1723 elif zinfo.header_offset > ZIP64_LIMIT:
1724 requires_zip64 = "Zipfile size"
1725 if requires_zip64:
1726 raise LargeZipFile(requires_zip64 +
1727 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001728
Bo Baylesce237c72018-01-29 23:54:07 -06001729 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001730 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001731 """Put the bytes from filename into the archive under the name
1732 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001733 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001734 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001735 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001736 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001737 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001738 "Can't write to ZIP archive while an open writing handle exists"
1739 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001740
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001741 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001742 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001743
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001744 if zinfo.is_dir():
1745 zinfo.compress_size = 0
1746 zinfo.CRC = 0
1747 else:
1748 if compress_type is not None:
1749 zinfo.compress_type = compress_type
1750 else:
1751 zinfo.compress_type = self.compression
1752
Bo Baylesce237c72018-01-29 23:54:07 -06001753 if compresslevel is not None:
1754 zinfo._compresslevel = compresslevel
1755 else:
1756 zinfo._compresslevel = self.compresslevel
1757
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001758 if zinfo.is_dir():
1759 with self._lock:
1760 if self._seekable:
1761 self.fp.seek(self.start_dir)
1762 zinfo.header_offset = self.fp.tell() # Start of header bytes
1763 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001764 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001765 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001766
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001767 self._writecheck(zinfo)
1768 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001769
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001770 self.filelist.append(zinfo)
1771 self.NameToInfo[zinfo.filename] = zinfo
1772 self.fp.write(zinfo.FileHeader(False))
1773 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001774 else:
1775 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1776 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001777
Bo Baylesce237c72018-01-29 23:54:07 -06001778 def writestr(self, zinfo_or_arcname, data,
1779 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001780 """Write a file into the archive. The contents is 'data', which
1781 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1782 it is encoded as UTF-8 first.
1783 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001784 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001785 if isinstance(data, str):
1786 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001787 if not isinstance(zinfo_or_arcname, ZipInfo):
1788 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001789 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001790 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001791 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001792 if zinfo.filename[-1] == '/':
1793 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1794 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1795 else:
1796 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001797 else:
1798 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001799
1800 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001801 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001802 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001803 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001804 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001805 "Can't write to ZIP archive while an open writing handle exists."
1806 )
1807
1808 if compress_type is not None:
1809 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001810
Bo Baylesce237c72018-01-29 23:54:07 -06001811 if compresslevel is not None:
1812 zinfo._compresslevel = compresslevel
1813
Guido van Rossum85825dc2007-08-27 17:03:28 +00001814 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001815 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001816 with self.open(zinfo, mode='w') as dest:
1817 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001818
1819 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001820 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001821 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001822
1823 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001824 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001825 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001826 if self.fp is None:
1827 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001828
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001829 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001830 raise ValueError("Can't close the ZIP file while there is "
1831 "an open writing handle on it. "
1832 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001833
Antoine Pitrou17babc52012-11-17 23:50:08 +01001834 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001835 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001836 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001837 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001838 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001839 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001840 finally:
1841 fp = self.fp
1842 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001843 self._fpclose(fp)
1844
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001845 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001846 for zinfo in self.filelist: # write central directory
1847 dt = zinfo.date_time
1848 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1849 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1850 extra = []
1851 if zinfo.file_size > ZIP64_LIMIT \
1852 or zinfo.compress_size > ZIP64_LIMIT:
1853 extra.append(zinfo.file_size)
1854 extra.append(zinfo.compress_size)
1855 file_size = 0xffffffff
1856 compress_size = 0xffffffff
1857 else:
1858 file_size = zinfo.file_size
1859 compress_size = zinfo.compress_size
1860
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001861 if zinfo.header_offset > ZIP64_LIMIT:
1862 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001863 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001864 else:
1865 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001866
1867 extra_data = zinfo.extra
1868 min_version = 0
1869 if extra:
1870 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001871 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001872 extra_data = struct.pack(
1873 '<HH' + 'Q'*len(extra),
1874 1, 8*len(extra), *extra) + extra_data
1875
1876 min_version = ZIP64_VERSION
1877
1878 if zinfo.compress_type == ZIP_BZIP2:
1879 min_version = max(BZIP2_VERSION, min_version)
1880 elif zinfo.compress_type == ZIP_LZMA:
1881 min_version = max(LZMA_VERSION, min_version)
1882
1883 extract_version = max(min_version, zinfo.extract_version)
1884 create_version = max(min_version, zinfo.create_version)
1885 try:
1886 filename, flag_bits = zinfo._encodeFilenameFlags()
1887 centdir = struct.pack(structCentralDir,
1888 stringCentralDir, create_version,
1889 zinfo.create_system, extract_version, zinfo.reserved,
1890 flag_bits, zinfo.compress_type, dostime, dosdate,
1891 zinfo.CRC, compress_size, file_size,
1892 len(filename), len(extra_data), len(zinfo.comment),
1893 0, zinfo.internal_attr, zinfo.external_attr,
1894 header_offset)
1895 except DeprecationWarning:
1896 print((structCentralDir, stringCentralDir, create_version,
1897 zinfo.create_system, extract_version, zinfo.reserved,
1898 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1899 zinfo.CRC, compress_size, file_size,
1900 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1901 0, zinfo.internal_attr, zinfo.external_attr,
1902 header_offset), file=sys.stderr)
1903 raise
1904 self.fp.write(centdir)
1905 self.fp.write(filename)
1906 self.fp.write(extra_data)
1907 self.fp.write(zinfo.comment)
1908
1909 pos2 = self.fp.tell()
1910 # Write end-of-zip-archive record
1911 centDirCount = len(self.filelist)
1912 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001913 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001914 requires_zip64 = None
1915 if centDirCount > ZIP_FILECOUNT_LIMIT:
1916 requires_zip64 = "Files count"
1917 elif centDirOffset > ZIP64_LIMIT:
1918 requires_zip64 = "Central directory offset"
1919 elif centDirSize > ZIP64_LIMIT:
1920 requires_zip64 = "Central directory size"
1921 if requires_zip64:
1922 # Need to write the ZIP64 end-of-archive records
1923 if not self._allowZip64:
1924 raise LargeZipFile(requires_zip64 +
1925 " would require ZIP64 extensions")
1926 zip64endrec = struct.pack(
1927 structEndArchive64, stringEndArchive64,
1928 44, 45, 45, 0, 0, centDirCount, centDirCount,
1929 centDirSize, centDirOffset)
1930 self.fp.write(zip64endrec)
1931
1932 zip64locrec = struct.pack(
1933 structEndArchive64Locator,
1934 stringEndArchive64Locator, 0, pos2, 1)
1935 self.fp.write(zip64locrec)
1936 centDirCount = min(centDirCount, 0xFFFF)
1937 centDirSize = min(centDirSize, 0xFFFFFFFF)
1938 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1939
1940 endrec = struct.pack(structEndArchive, stringEndArchive,
1941 0, 0, centDirCount, centDirCount,
1942 centDirSize, centDirOffset, len(self._comment))
1943 self.fp.write(endrec)
1944 self.fp.write(self._comment)
1945 self.fp.flush()
1946
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001947 def _fpclose(self, fp):
1948 assert self._fileRefCnt > 0
1949 self._fileRefCnt -= 1
1950 if not self._fileRefCnt and not self._filePassed:
1951 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001952
1953
1954class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001955 """Class to create ZIP archives with Python library files and packages."""
1956
Georg Brandl8334fd92010-12-04 10:26:46 +00001957 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001958 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001959 ZipFile.__init__(self, file, mode=mode, compression=compression,
1960 allowZip64=allowZip64)
1961 self._optimize = optimize
1962
Christian Tismer59202e52013-10-21 03:59:23 +02001963 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001964 """Add all files from "pathname" to the ZIP archive.
1965
Fred Drake484d7352000-10-02 21:14:52 +00001966 If pathname is a package directory, search the directory and
1967 all package subdirectories recursively for all *.py and enter
1968 the modules into the archive. If pathname is a plain
1969 directory, listdir *.py and enter all modules. Else, pathname
1970 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001971 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001972 This method will compile the module.py into module.pyc if
1973 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001974 If filterfunc(pathname) is given, it is called with every argument.
1975 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001976 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001977 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001978 if filterfunc and not filterfunc(pathname):
1979 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001980 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001981 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001982 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001983 dir, name = os.path.split(pathname)
1984 if os.path.isdir(pathname):
1985 initname = os.path.join(pathname, "__init__.py")
1986 if os.path.isfile(initname):
1987 # This is a package directory, add it
1988 if basename:
1989 basename = "%s/%s" % (basename, name)
1990 else:
1991 basename = name
1992 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001993 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001994 fname, arcname = self._get_codename(initname[0:-3], basename)
1995 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001996 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001997 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001998 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001999 dirlist.remove("__init__.py")
2000 # Add all *.py files and package subdirectories
2001 for filename in dirlist:
2002 path = os.path.join(pathname, filename)
2003 root, ext = os.path.splitext(filename)
2004 if os.path.isdir(path):
2005 if os.path.isfile(os.path.join(path, "__init__.py")):
2006 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02002007 self.writepy(path, basename,
2008 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002009 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002010 if filterfunc and not filterfunc(path):
2011 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002012 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002013 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002014 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002015 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002016 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002017 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002018 self.write(fname, arcname)
2019 else:
2020 # This is NOT a package directory, add its files at top level
2021 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002022 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002023 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002024 path = os.path.join(pathname, filename)
2025 root, ext = os.path.splitext(filename)
2026 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002027 if filterfunc and not filterfunc(path):
2028 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002029 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002030 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002031 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002032 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002033 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002034 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002035 self.write(fname, arcname)
2036 else:
2037 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002038 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002039 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002040 fname, arcname = self._get_codename(pathname[0:-3], basename)
2041 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002042 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002043 self.write(fname, arcname)
2044
2045 def _get_codename(self, pathname, basename):
2046 """Return (filename, archivename) for the path.
2047
Fred Drake484d7352000-10-02 21:14:52 +00002048 Given a module name path, return the correct file path and
2049 archive name, compiling if necessary. For example, given
2050 /python/lib/string, return (/python/lib/string.pyc, string).
2051 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002052 def _compile(file, optimize=-1):
2053 import py_compile
2054 if self.debug:
2055 print("Compiling", file)
2056 try:
2057 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002058 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002059 print(err.msg)
2060 return False
2061 return True
2062
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002063 file_py = pathname + ".py"
2064 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002065 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2066 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2067 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002068 if self._optimize == -1:
2069 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002070 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002071 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2072 # Use .pyc file.
2073 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002074 elif (os.path.isfile(pycache_opt0) and
2075 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002076 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2077 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002078 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002079 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002080 elif (os.path.isfile(pycache_opt1) and
2081 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2082 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002083 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002084 fname = pycache_opt1
2085 arcname = file_pyc
2086 elif (os.path.isfile(pycache_opt2) and
2087 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2088 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2089 # file name in the archive.
2090 fname = pycache_opt2
2091 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002092 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002093 # Compile py into PEP 3147 pyc file.
2094 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002095 if sys.flags.optimize == 0:
2096 fname = pycache_opt0
2097 elif sys.flags.optimize == 1:
2098 fname = pycache_opt1
2099 else:
2100 fname = pycache_opt2
2101 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002102 else:
2103 fname = arcname = file_py
2104 else:
2105 # new mode: use given optimization level
2106 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002107 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002108 arcname = file_pyc
2109 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002110 arcname = file_pyc
2111 if self._optimize == 1:
2112 fname = pycache_opt1
2113 elif self._optimize == 2:
2114 fname = pycache_opt2
2115 else:
2116 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2117 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002118 if not (os.path.isfile(fname) and
2119 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2120 if not _compile(file_py, optimize=self._optimize):
2121 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002122 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002123 if basename:
2124 archivename = "%s/%s" % (basename, archivename)
2125 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002126
2127
Miss Islington (bot)c410f382019-08-24 09:03:52 -07002128def _parents(path):
2129 """
2130 Given a path with elements separated by
2131 posixpath.sep, generate all parents of that path.
2132
2133 >>> list(_parents('b/d'))
2134 ['b']
2135 >>> list(_parents('/b/d/'))
2136 ['/b']
2137 >>> list(_parents('b/d/f/'))
2138 ['b/d', 'b']
2139 >>> list(_parents('b'))
2140 []
2141 >>> list(_parents(''))
2142 []
2143 """
2144 return itertools.islice(_ancestry(path), 1, None)
2145
2146
2147def _ancestry(path):
2148 """
2149 Given a path with elements separated by
2150 posixpath.sep, generate all elements of that path
2151
2152 >>> list(_ancestry('b/d'))
2153 ['b/d', 'b']
2154 >>> list(_ancestry('/b/d/'))
2155 ['/b/d', '/b']
2156 >>> list(_ancestry('b/d/f/'))
2157 ['b/d/f', 'b/d', 'b']
2158 >>> list(_ancestry('b'))
2159 ['b']
2160 >>> list(_ancestry(''))
2161 []
2162 """
2163 path = path.rstrip(posixpath.sep)
2164 while path and path != posixpath.sep:
2165 yield path
2166 path, tail = posixpath.split(path)
2167
2168
Miss Islington (bot)3e72de92020-04-15 11:45:25 -07002169_dedupe = dict.fromkeys
2170"""Deduplicate an iterable in original order"""
2171
2172
2173def _difference(minuend, subtrahend):
2174 """
2175 Return items in minuend not in subtrahend, retaining order
2176 with O(1) lookup.
2177 """
2178 return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2179
2180
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -08002181class CompleteDirs(ZipFile):
2182 """
2183 A ZipFile subclass that ensures that implied directories
2184 are always included in the namelist.
2185 """
2186
2187 @staticmethod
2188 def _implied_dirs(names):
2189 parents = itertools.chain.from_iterable(map(_parents, names))
Miss Islington (bot)3e72de92020-04-15 11:45:25 -07002190 as_dirs = (p + posixpath.sep for p in parents)
2191 return _dedupe(_difference(as_dirs, names))
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -08002192
2193 def namelist(self):
2194 names = super(CompleteDirs, self).namelist()
2195 return names + list(self._implied_dirs(names))
2196
2197 def _name_set(self):
2198 return set(self.namelist())
2199
2200 def resolve_dir(self, name):
2201 """
2202 If the name represents a directory, return that name
2203 as a directory (with the trailing slash).
2204 """
2205 names = self._name_set()
2206 dirname = name + '/'
2207 dir_match = name not in names and dirname in names
2208 return dirname if dir_match else name
2209
2210 @classmethod
2211 def make(cls, source):
2212 """
2213 Given a source (filename or zipfile), return an
2214 appropriate CompleteDirs subclass.
2215 """
2216 if isinstance(source, CompleteDirs):
2217 return source
2218
2219 if not isinstance(source, ZipFile):
2220 return cls(source)
2221
2222 # Only allow for FastPath when supplied zipfile is read-only
2223 if 'r' not in source.mode:
2224 cls = CompleteDirs
2225
2226 res = cls.__new__(cls)
2227 vars(res).update(vars(source))
2228 return res
2229
2230
2231class FastLookup(CompleteDirs):
2232 """
2233 ZipFile subclass to ensure implicit
2234 dirs exist and are resolved rapidly.
2235 """
2236 def namelist(self):
2237 with contextlib.suppress(AttributeError):
2238 return self.__names
2239 self.__names = super(FastLookup, self).namelist()
2240 return self.__names
2241
2242 def _name_set(self):
2243 with contextlib.suppress(AttributeError):
2244 return self.__lookup
2245 self.__lookup = super(FastLookup, self)._name_set()
2246 return self.__lookup
2247
2248
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002249class Path:
2250 """
2251 A pathlib-compatible interface for zip files.
2252
2253 Consider a zip file with this structure::
2254
2255 .
2256 ├── a.txt
2257 └── b
2258 ├── c.txt
2259 └── d
2260 └── e.txt
2261
2262 >>> data = io.BytesIO()
2263 >>> zf = ZipFile(data, 'w')
2264 >>> zf.writestr('a.txt', 'content of a')
2265 >>> zf.writestr('b/c.txt', 'content of c')
2266 >>> zf.writestr('b/d/e.txt', 'content of e')
2267 >>> zf.filename = 'abcde.zip'
2268
2269 Path accepts the zipfile object itself or a filename
2270
2271 >>> root = Path(zf)
2272
2273 From there, several path operations are available.
2274
2275 Directory iteration (including the zip file itself):
2276
2277 >>> a, b = root.iterdir()
2278 >>> a
2279 Path('abcde.zip', 'a.txt')
2280 >>> b
2281 Path('abcde.zip', 'b/')
2282
2283 name property:
2284
2285 >>> b.name
2286 'b'
2287
2288 join with divide operator:
2289
2290 >>> c = b / 'c.txt'
2291 >>> c
2292 Path('abcde.zip', 'b/c.txt')
2293 >>> c.name
2294 'c.txt'
2295
2296 Read text:
2297
2298 >>> c.read_text()
2299 'content of c'
2300
2301 existence:
2302
2303 >>> c.exists()
2304 True
2305 >>> (b / 'missing.txt').exists()
2306 False
2307
Xtreak0d702272019-06-03 04:42:33 +05302308 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002309
2310 >>> str(c)
2311 'abcde.zip/b/c.txt'
2312 """
2313
2314 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2315
2316 def __init__(self, root, at=""):
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -08002317 self.root = FastLookup.make(root)
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002318 self.at = at
2319
2320 @property
2321 def open(self):
2322 return functools.partial(self.root.open, self.at)
2323
2324 @property
2325 def name(self):
2326 return posixpath.basename(self.at.rstrip("/"))
2327
2328 def read_text(self, *args, **kwargs):
2329 with self.open() as strm:
2330 return io.TextIOWrapper(strm, *args, **kwargs).read()
2331
2332 def read_bytes(self):
2333 with self.open() as strm:
2334 return strm.read()
2335
2336 def _is_child(self, path):
2337 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2338
2339 def _next(self, at):
2340 return Path(self.root, at)
2341
2342 def is_dir(self):
2343 return not self.at or self.at.endswith("/")
2344
2345 def is_file(self):
2346 return not self.is_dir()
2347
2348 def exists(self):
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -08002349 return self.at in self.root._name_set()
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002350
2351 def iterdir(self):
2352 if not self.is_dir():
2353 raise ValueError("Can't listdir a file")
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -08002354 subs = map(self._next, self.root.namelist())
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002355 return filter(self._is_child, subs)
2356
2357 def __str__(self):
2358 return posixpath.join(self.root.filename, self.at)
2359
2360 def __repr__(self):
2361 return self.__repr.format(self=self)
2362
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002363 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002364 next = posixpath.join(self.at, add)
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -08002365 return self._next(self.root.resolve_dir(next))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002366
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002367 __truediv__ = joinpath
2368
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002369 @property
2370 def parent(self):
Miss Islington (bot)66905d12019-07-07 15:05:53 -07002371 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002372 if parent_at:
2373 parent_at += '/'
2374 return self._next(parent_at)
2375
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002376
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002377def main(args=None):
2378 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002379
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002380 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002381 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002382 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002383 group.add_argument('-l', '--list', metavar='<zipfile>',
2384 help='Show listing of a zipfile')
2385 group.add_argument('-e', '--extract', nargs=2,
2386 metavar=('<zipfile>', '<output_dir>'),
2387 help='Extract zipfile into target dir')
2388 group.add_argument('-c', '--create', nargs='+',
2389 metavar=('<name>', '<file>'),
2390 help='Create zipfile from sources')
2391 group.add_argument('-t', '--test', metavar='<zipfile>',
2392 help='Test if a zipfile is valid')
2393 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002394
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002395 if args.test is not None:
2396 src = args.test
2397 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002398 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002399 if badfile:
2400 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002401 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002402
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002403 elif args.list is not None:
2404 src = args.list
2405 with ZipFile(src, 'r') as zf:
2406 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002407
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002408 elif args.extract is not None:
2409 src, curdir = args.extract
2410 with ZipFile(src, 'r') as zf:
2411 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002412
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002413 elif args.create is not None:
2414 zip_name = args.create.pop(0)
2415 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002416
2417 def addToZip(zf, path, zippath):
2418 if os.path.isfile(path):
2419 zf.write(path, zippath, ZIP_DEFLATED)
2420 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002421 if zippath:
2422 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002423 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002424 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002425 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002426 # else: ignore
2427
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002428 with ZipFile(zip_name, 'w') as zf:
2429 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002430 zippath = os.path.basename(path)
2431 if not zippath:
2432 zippath = os.path.basename(os.path.dirname(path))
2433 if zippath in ('', os.curdir, os.pardir):
2434 zippath = ''
2435 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002436
Miss Islington (bot)ed4d2632020-02-11 19:21:32 -08002437
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002438if __name__ == "__main__":
2439 main()