blob: 8f8cb863b003430e9071b8306c758ba1200ba3ba [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
7import functools
8import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00009import io
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040011import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000012import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040013import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000014import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040015import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020016import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040017import time
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018
19try:
Tim Peterse1190062001-01-15 03:34:38 +000020 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000021 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040022except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020026try:
27 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029 bz2 = None
30
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020031try:
32 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040033except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034 lzma = None
35
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020036__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000038 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Georg Brandl4d540882010-10-28 06:42:33 +000040class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000041 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042
43
44class LargeZipFile(Exception):
45 """
46 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
47 and those extensions are disabled.
48 """
49
Georg Brandl4d540882010-10-28 06:42:33 +000050error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
51
Guido van Rossum32abe6f2000-03-31 17:30:02 +000052
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000053ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030054ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000056
Guido van Rossum32abe6f2000-03-31 17:30:02 +000057# constants for Zip file compression methods
58ZIP_STORED = 0
59ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020060ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020061ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000062# Other ZIP compression methods not supported
63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020064DEFAULT_VERSION = 20
65ZIP64_VERSION = 45
66BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020068# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020070
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071# Below are some formats and associated data for reading/writing headers using
72# the struct module. The names and structures of headers/records are those used
73# in the PKWARE description of the ZIP file format:
74# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
75# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000076
Martin v. Löwisb09b8442008-07-03 14:13:42 +000077# The "end of central directory" structure, magic number, size, and indices
78# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000079structEndArchive = b"<4s4H2LH"
80stringEndArchive = b"PK\005\006"
81sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000082
83_ECD_SIGNATURE = 0
84_ECD_DISK_NUMBER = 1
85_ECD_DISK_START = 2
86_ECD_ENTRIES_THIS_DISK = 3
87_ECD_ENTRIES_TOTAL = 4
88_ECD_SIZE = 5
89_ECD_OFFSET = 6
90_ECD_COMMENT_SIZE = 7
91# These last two indices are not part of the structure as defined in the
92# spec, but they are used internally by this module as a convenience
93_ECD_COMMENT = 8
94_ECD_LOCATION = 9
95
96# The "central directory" structure, magic number, size, and indices
97# of entries in the structure (section V.F in the format document)
98structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000099stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100sizeCentralDir = struct.calcsize(structCentralDir)
101
Fred Drake3e038e52001-02-28 17:56:26 +0000102# indexes of entries in the central directory structure
103_CD_SIGNATURE = 0
104_CD_CREATE_VERSION = 1
105_CD_CREATE_SYSTEM = 2
106_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000107_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000108_CD_FLAG_BITS = 5
109_CD_COMPRESS_TYPE = 6
110_CD_TIME = 7
111_CD_DATE = 8
112_CD_CRC = 9
113_CD_COMPRESSED_SIZE = 10
114_CD_UNCOMPRESSED_SIZE = 11
115_CD_FILENAME_LENGTH = 12
116_CD_EXTRA_FIELD_LENGTH = 13
117_CD_COMMENT_LENGTH = 14
118_CD_DISK_NUMBER_START = 15
119_CD_INTERNAL_FILE_ATTRIBUTES = 16
120_CD_EXTERNAL_FILE_ATTRIBUTES = 17
121_CD_LOCAL_HEADER_OFFSET = 18
122
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123# The "local file header" structure, magic number, size, and indices
124# (section V.A in the format document)
125structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000126stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000127sizeFileHeader = struct.calcsize(structFileHeader)
128
Fred Drake3e038e52001-02-28 17:56:26 +0000129_FH_SIGNATURE = 0
130_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_GENERAL_PURPOSE_FLAG_BITS = 3
133_FH_COMPRESSION_METHOD = 4
134_FH_LAST_MOD_TIME = 5
135_FH_LAST_MOD_DATE = 6
136_FH_CRC = 7
137_FH_COMPRESSED_SIZE = 8
138_FH_UNCOMPRESSED_SIZE = 9
139_FH_FILENAME_LENGTH = 10
140_FH_EXTRA_FIELD_LENGTH = 11
141
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000143structEndArchive64Locator = "<4sLQL"
144stringEndArchive64Locator = b"PK\x06\x07"
145sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000146
147# The "Zip64 end of central directory" record, magic number, size, and indices
148# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000149structEndArchive64 = "<4sQ2H2L4Q"
150stringEndArchive64 = b"PK\x06\x06"
151sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000152
153_CD64_SIGNATURE = 0
154_CD64_DIRECTORY_RECSIZE = 1
155_CD64_CREATE_VERSION = 2
156_CD64_EXTRACT_VERSION = 3
157_CD64_DISK_NUMBER = 4
158_CD64_DISK_NUMBER_START = 5
159_CD64_NUMBER_ENTRIES_THIS_DISK = 6
160_CD64_NUMBER_ENTRIES_TOTAL = 7
161_CD64_DIRECTORY_SIZE = 8
162_CD64_OFFSET_START_CENTDIR = 9
163
Silas Sewell4ba3b502018-09-18 13:00:05 -0400164_DD_SIGNATURE = 0x08074b50
165
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300166_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
167
168def _strip_extra(extra, xids):
169 # Remove Extra Fields with specified IDs.
170 unpack = _EXTRA_FIELD_STRUCT.unpack
171 modified = False
172 buffer = []
173 start = i = 0
174 while i + 4 <= len(extra):
175 xid, xlen = unpack(extra[i : i + 4])
176 j = i + 4 + xlen
177 if xid in xids:
178 if i != start:
179 buffer.append(extra[start : i])
180 start = j
181 modified = True
182 i = j
183 if not modified:
184 return extra
185 return b''.join(buffer)
186
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000187def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000189 if _EndRecData(fp):
190 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200191 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000193 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000194
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000195def is_zipfile(filename):
196 """Quickly see if a file is a ZIP file by checking the magic number.
197
198 The filename argument may be a file or file-like object too.
199 """
200 result = False
201 try:
202 if hasattr(filename, "read"):
203 result = _check_zipfile(fp=filename)
204 else:
205 with open(filename, "rb") as fp:
206 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200207 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000208 pass
209 return result
210
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000211def _EndRecData64(fpin, offset, endrec):
212 """
213 Read the ZIP64 end-of-archive records and use that to update endrec
214 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000215 try:
216 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200217 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000218 # If the seek fails, the file is not large enough to contain a ZIP64
219 # end-of-archive record, so just return the end record we were given.
220 return endrec
221
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200223 if len(data) != sizeEndCentDir64Locator:
224 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000225 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
226 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227 return endrec
228
229 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000230 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000231
232 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000233 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
234 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200235 if len(data) != sizeEndCentDir64:
236 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000237 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200238 dircount, dircount2, dirsize, diroffset = \
239 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000240 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000241 return endrec
242
243 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000244 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 endrec[_ECD_DISK_NUMBER] = disk_num
246 endrec[_ECD_DISK_START] = disk_dir
247 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
248 endrec[_ECD_ENTRIES_TOTAL] = dircount2
249 endrec[_ECD_SIZE] = dirsize
250 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000251 return endrec
252
253
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000254def _EndRecData(fpin):
255 """Return data from the "End of Central Directory" record, or None.
256
257 The data is a list of the nine items in the ZIP "End of central dir"
258 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
260 # Determine file size
261 fpin.seek(0, 2)
262 filesize = fpin.tell()
263
264 # Check to see if this is ZIP file with no archive comment (the
265 # "end of central directory" structure should be the last item in the
266 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000267 try:
268 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200269 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000270 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000271 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200272 if (len(data) == sizeEndCentDir and
273 data[0:4] == stringEndArchive and
274 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000276 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000277 endrec=list(endrec)
278
279 # Append a blank comment and record start offset
280 endrec.append(b"")
281 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000282
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000283 # Try to read the "Zip64 end of central directory" structure
284 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
286 # Either this is not a ZIP file, or it is a ZIP file with an archive
287 # comment. Search the end of the file for the "end of central directory"
288 # record signature. The comment is the last item in the ZIP file and may be
289 # up to 64K long. It is assumed that the "end of central directory" magic
290 # number does not appear in the comment.
291 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
292 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000293 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000294 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000295 if start >= 0:
296 # found the magic number; attempt to unpack and interpret
297 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200298 if len(recData) != sizeEndCentDir:
299 # Zip file is corrupted.
300 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000301 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400302 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
303 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
304 endrec.append(comment)
305 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000306
R David Murray4fbb9db2011-06-09 15:50:51 -0400307 # Try to read the "Zip64 end of central directory" structure
308 return _EndRecData64(fpin, maxCommentStart + start - filesize,
309 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000310
311 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200312 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000313
Fred Drake484d7352000-10-02 21:14:52 +0000314
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000316 """Class with attributes describing each file in the ZIP archive."""
317
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200319 'orig_filename',
320 'filename',
321 'date_time',
322 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600323 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200324 'comment',
325 'extra',
326 'create_system',
327 'create_version',
328 'extract_version',
329 'reserved',
330 'flag_bits',
331 'volume',
332 'internal_attr',
333 'external_attr',
334 'header_offset',
335 'CRC',
336 'compress_size',
337 'file_size',
338 '_raw_time',
339 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000340
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000341 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000342 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000343
344 # Terminate the file name at the first null byte. Null bytes in file
345 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000346 null_byte = filename.find(chr(0))
347 if null_byte >= 0:
348 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000349 # This is used to ensure paths in generated ZIP files always use
350 # forward slashes as the directory separator, as required by the
351 # ZIP format specification.
352 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000353 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354
Greg Ward8e36d282003-06-18 00:53:06 +0000355 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000356 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800357
358 if date_time[0] < 1980:
359 raise ValueError('ZIP does not support timestamps before 1980')
360
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000362 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600363 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000364 self.comment = b"" # Comment for each file
365 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000366 if sys.platform == 'win32':
367 self.create_system = 0 # System which created ZIP archive
368 else:
369 # Assume everything else is unix-y
370 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200371 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
372 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000373 self.reserved = 0 # Must be zero
374 self.flag_bits = 0 # ZIP flag bits
375 self.volume = 0 # Volume number of file header
376 self.internal_attr = 0 # Internal attributes
377 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000378 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000379 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000380 # CRC CRC-32 of the uncompressed file
381 # compress_size Size of the compressed file
382 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200384 def __repr__(self):
385 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
386 if self.compress_type != ZIP_STORED:
387 result.append(' compress_type=%s' %
388 compressor_names.get(self.compress_type,
389 self.compress_type))
390 hi = self.external_attr >> 16
391 lo = self.external_attr & 0xFFFF
392 if hi:
393 result.append(' filemode=%r' % stat.filemode(hi))
394 if lo:
395 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200396 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200397 if not isdir or self.file_size:
398 result.append(' file_size=%r' % self.file_size)
399 if ((not isdir or self.compress_size) and
400 (self.compress_type != ZIP_STORED or
401 self.file_size != self.compress_size)):
402 result.append(' compress_size=%r' % self.compress_size)
403 result.append('>')
404 return ''.join(result)
405
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200406 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200407 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000408 dt = self.date_time
409 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000410 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000411 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000412 # Set these to zero because we write them after the file data
413 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000414 else:
Tim Peterse1190062001-01-15 03:34:38 +0000415 CRC = self.CRC
416 compress_size = self.compress_size
417 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000418
419 extra = self.extra
420
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200421 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200422 if zip64 is None:
423 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
424 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000425 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000426 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200427 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200428 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
429 if not zip64:
430 raise LargeZipFile("Filesize would require ZIP64 extensions")
431 # File is larger than what fits into a 4 byte integer,
432 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000433 file_size = 0xffffffff
434 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200435 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000436
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200437 if self.compress_type == ZIP_BZIP2:
438 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200439 elif self.compress_type == ZIP_LZMA:
440 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200441
442 self.extract_version = max(min_version, self.extract_version)
443 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000444 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000445 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200446 self.extract_version, self.reserved, flag_bits,
447 self.compress_type, dostime, dosdate, CRC,
448 compress_size, file_size,
449 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000450 return header + filename + extra
451
452 def _encodeFilenameFlags(self):
453 try:
454 return self.filename.encode('ascii'), self.flag_bits
455 except UnicodeEncodeError:
456 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000457
458 def _decodeExtra(self):
459 # Try to decode the extra field.
460 extra = self.extra
461 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700462 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000463 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200464 if ln+4 > len(extra):
465 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
466 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000468 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000470 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000471 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000472 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473 elif ln == 0:
474 counts = ()
475 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300476 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000477
478 idx = 0
479
480 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000481 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000482 self.file_size = counts[idx]
483 idx += 1
484
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000485 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000486 self.compress_size = counts[idx]
487 idx += 1
488
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000489 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000490 old = self.header_offset
491 self.header_offset = counts[idx]
492 idx+=1
493
494 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000495
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200496 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200497 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200498 """Construct an appropriate ZipInfo for a file on the filesystem.
499
500 filename should be the path to a file or directory on the filesystem.
501
502 arcname is the name which it will have within the archive (by default,
503 this will be the same as filename, but without a drive letter and with
504 leading path separators removed).
505 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200506 if isinstance(filename, os.PathLike):
507 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200508 st = os.stat(filename)
509 isdir = stat.S_ISDIR(st.st_mode)
510 mtime = time.localtime(st.st_mtime)
511 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200512 if not strict_timestamps and date_time[0] < 1980:
513 date_time = (1980, 1, 1, 0, 0, 0)
514 elif not strict_timestamps and date_time[0] > 2107:
515 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200516 # Create ZipInfo instance to store file information
517 if arcname is None:
518 arcname = filename
519 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
520 while arcname[0] in (os.sep, os.altsep):
521 arcname = arcname[1:]
522 if isdir:
523 arcname += '/'
524 zinfo = cls(arcname, date_time)
525 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
526 if isdir:
527 zinfo.file_size = 0
528 zinfo.external_attr |= 0x10 # MS-DOS directory flag
529 else:
530 zinfo.file_size = st.st_size
531
532 return zinfo
533
534 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300535 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200536 return self.filename[-1] == '/'
537
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000538
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300539# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
540# internal keys. We noticed that a direct implementation is faster than
541# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000542
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300543_crctable = None
544def _gen_crc(crc):
545 for j in range(8):
546 if crc & 1:
547 crc = (crc >> 1) ^ 0xEDB88320
548 else:
549 crc >>= 1
550 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552# ZIP supports a password-based form of encryption. Even though known
553# plaintext attacks have been found against it, it is still useful
554# to be able to get data out of such a file.
555#
556# Usage:
557# zd = _ZipDecrypter(mypwd)
558# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000559
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300560def _ZipDecrypter(pwd):
561 key0 = 305419896
562 key1 = 591751049
563 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000564
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300565 global _crctable
566 if _crctable is None:
567 _crctable = list(map(_gen_crc, range(256)))
568 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000569
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300570 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000571 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300572 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000573
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300574 def update_keys(c):
575 nonlocal key0, key1, key2
576 key0 = crc32(c, key0)
577 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
578 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
579 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000580
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300581 for p in pwd:
582 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000583
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300584 def decrypter(data):
585 """Decrypt a bytes object."""
586 result = bytearray()
587 append = result.append
588 for c in data:
589 k = key2 | 2
590 c ^= ((k * (k^1)) >> 8) & 0xFF
591 update_keys(c)
592 append(c)
593 return bytes(result)
594
595 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000596
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200597
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200598class LZMACompressor:
599
600 def __init__(self):
601 self._comp = None
602
603 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200604 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200605 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200606 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200607 ])
608 return struct.pack('<BBH', 9, 4, len(props)) + props
609
610 def compress(self, data):
611 if self._comp is None:
612 return self._init() + self._comp.compress(data)
613 return self._comp.compress(data)
614
615 def flush(self):
616 if self._comp is None:
617 return self._init() + self._comp.flush()
618 return self._comp.flush()
619
620
621class LZMADecompressor:
622
623 def __init__(self):
624 self._decomp = None
625 self._unconsumed = b''
626 self.eof = False
627
628 def decompress(self, data):
629 if self._decomp is None:
630 self._unconsumed += data
631 if len(self._unconsumed) <= 4:
632 return b''
633 psize, = struct.unpack('<H', self._unconsumed[2:4])
634 if len(self._unconsumed) <= 4 + psize:
635 return b''
636
637 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200638 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
639 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200640 ])
641 data = self._unconsumed[4 + psize:]
642 del self._unconsumed
643
644 result = self._decomp.decompress(data)
645 self.eof = self._decomp.eof
646 return result
647
648
649compressor_names = {
650 0: 'store',
651 1: 'shrink',
652 2: 'reduce',
653 3: 'reduce',
654 4: 'reduce',
655 5: 'reduce',
656 6: 'implode',
657 7: 'tokenize',
658 8: 'deflate',
659 9: 'deflate64',
660 10: 'implode',
661 12: 'bzip2',
662 14: 'lzma',
663 18: 'terse',
664 19: 'lz77',
665 97: 'wavpack',
666 98: 'ppmd',
667}
668
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200669def _check_compression(compression):
670 if compression == ZIP_STORED:
671 pass
672 elif compression == ZIP_DEFLATED:
673 if not zlib:
674 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200675 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200676 elif compression == ZIP_BZIP2:
677 if not bz2:
678 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200679 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200680 elif compression == ZIP_LZMA:
681 if not lzma:
682 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200683 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300685 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200686
687
Bo Baylesce237c72018-01-29 23:54:07 -0600688def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200689 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600690 if compresslevel is not None:
691 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
692 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200693 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600694 if compresslevel is not None:
695 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200696 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600697 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200698 elif compress_type == ZIP_LZMA:
699 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200700 else:
701 return None
702
703
704def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200705 if compress_type == ZIP_STORED:
706 return None
707 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200708 return zlib.decompressobj(-15)
709 elif compress_type == ZIP_BZIP2:
710 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200711 elif compress_type == ZIP_LZMA:
712 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200714 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200715 if descr:
716 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
717 else:
718 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200719
720
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200721class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300722 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200723 self._file = file
724 self._pos = pos
725 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200726 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300727 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700728 self.seekable = file.seekable
729 self.tell = file.tell
730
731 def seek(self, offset, whence=0):
732 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200733 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700734 raise ValueError("Can't reposition in the ZIP file while "
735 "there is an open writing handle on it. "
736 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200737 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700738 self._pos = self._file.tell()
739 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200740
741 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200742 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300743 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300744 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300745 "is an open writing handle on it. "
746 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200747 self._file.seek(self._pos)
748 data = self._file.read(n)
749 self._pos = self._file.tell()
750 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200751
752 def close(self):
753 if self._file is not None:
754 fileobj = self._file
755 self._file = None
756 self._close(fileobj)
757
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200758# Provide the tell method for unseekable stream
759class _Tellable:
760 def __init__(self, fp):
761 self.fp = fp
762 self.offset = 0
763
764 def write(self, data):
765 n = self.fp.write(data)
766 self.offset += n
767 return n
768
769 def tell(self):
770 return self.offset
771
772 def flush(self):
773 self.fp.flush()
774
775 def close(self):
776 self.fp.close()
777
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200778
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000779class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780 """File-like object for reading an archive member.
781 Is returned by ZipFile.open().
782 """
783
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000784 # Max size supported by decompressor.
785 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000786
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000787 # Read from compressed files in 4k blocks.
788 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000789
John Jolly066df4f2018-01-30 01:51:35 -0700790 # Chunk size to read during seek
791 MAX_SEEK_READ = 1 << 24
792
Łukasz Langae94980a2010-11-22 23:31:26 +0000793 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
794 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000795 self._fileobj = fileobj
796 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000797 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798
Ezio Melotti92b47432010-01-28 01:44:41 +0000799 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000800 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200801 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000802
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200803 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200805 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000806 self._readbuffer = b''
807 self._offset = 0
808
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000809 self.newlines = None
810
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000811 # Adjust read size for encrypted files since the first 12 bytes
812 # are for the encryption/password information.
813 if self._decrypter is not None:
814 self._compress_left -= 12
815
816 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 self.name = zipinfo.filename
818
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000819 if hasattr(zipinfo, 'CRC'):
820 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000821 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000822 else:
823 self._expected_crc = None
824
John Jolly066df4f2018-01-30 01:51:35 -0700825 self._seekable = False
826 try:
827 if fileobj.seekable():
828 self._orig_compress_start = fileobj.tell()
829 self._orig_compress_size = zipinfo.compress_size
830 self._orig_file_size = zipinfo.file_size
831 self._orig_start_crc = self._running_crc
832 self._seekable = True
833 except AttributeError:
834 pass
835
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200836 def __repr__(self):
837 result = ['<%s.%s' % (self.__class__.__module__,
838 self.__class__.__qualname__)]
839 if not self.closed:
840 result.append(' name=%r mode=%r' % (self.name, self.mode))
841 if self._compress_type != ZIP_STORED:
842 result.append(' compress_type=%s' %
843 compressor_names.get(self._compress_type,
844 self._compress_type))
845 else:
846 result.append(' [closed]')
847 result.append('>')
848 return ''.join(result)
849
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000850 def readline(self, limit=-1):
851 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000853 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855
Serhiy Storchakae670be22016-06-11 19:32:44 +0300856 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000857 # Shortcut common case - newline found in buffer.
858 i = self._readbuffer.find(b'\n', self._offset) + 1
859 if i > 0:
860 line = self._readbuffer[self._offset: i]
861 self._offset = i
862 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863
Serhiy Storchakae670be22016-06-11 19:32:44 +0300864 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000865
866 def peek(self, n=1):
867 """Returns buffered bytes without advancing the position."""
868 if n > len(self._readbuffer) - self._offset:
869 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200870 if len(chunk) > self._offset:
871 self._readbuffer = chunk + self._readbuffer[self._offset:]
872 self._offset = 0
873 else:
874 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000875
876 # Return up to 512 bytes to reduce allocation overhead for tight loops.
877 return self._readbuffer[self._offset: self._offset + 512]
878
879 def readable(self):
880 return True
881
882 def read(self, n=-1):
883 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800884 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200886 if n is None or n < 0:
887 buf = self._readbuffer[self._offset:]
888 self._readbuffer = b''
889 self._offset = 0
890 while not self._eof:
891 buf += self._read1(self.MAX_N)
892 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893
Antoine Pitrou78157b32012-06-23 16:44:48 +0200894 end = n + self._offset
895 if end < len(self._readbuffer):
896 buf = self._readbuffer[self._offset:end]
897 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200898 return buf
899
Antoine Pitrou78157b32012-06-23 16:44:48 +0200900 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200901 buf = self._readbuffer[self._offset:]
902 self._readbuffer = b''
903 self._offset = 0
904 while n > 0 and not self._eof:
905 data = self._read1(n)
906 if n < len(data):
907 self._readbuffer = data
908 self._offset = n
909 buf += data[:n]
910 break
911 buf += data
912 n -= len(data)
913 return buf
914
915 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000916 # Update the CRC using the given data.
917 if self._expected_crc is None:
918 # No need to compute the CRC if we don't have a reference value
919 return
Martin Panterb82032f2015-12-11 05:19:29 +0000920 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000921 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200922 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000923 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000924
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000925 def read1(self, n):
926 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200928 if n is None or n < 0:
929 buf = self._readbuffer[self._offset:]
930 self._readbuffer = b''
931 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300932 while not self._eof:
933 data = self._read1(self.MAX_N)
934 if data:
935 buf += data
936 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200937 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938
Antoine Pitrou78157b32012-06-23 16:44:48 +0200939 end = n + self._offset
940 if end < len(self._readbuffer):
941 buf = self._readbuffer[self._offset:end]
942 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200943 return buf
944
Antoine Pitrou78157b32012-06-23 16:44:48 +0200945 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200946 buf = self._readbuffer[self._offset:]
947 self._readbuffer = b''
948 self._offset = 0
949 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300950 while not self._eof:
951 data = self._read1(n)
952 if n < len(data):
953 self._readbuffer = data
954 self._offset = n
955 buf += data[:n]
956 break
957 if data:
958 buf += data
959 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200960 return buf
961
962 def _read1(self, n):
963 # Read up to n compressed bytes with at most one read() system call,
964 # decrypt and decompress them.
965 if self._eof or n <= 0:
966 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000967
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000968 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200969 if self._compress_type == ZIP_DEFLATED:
970 ## Handle unconsumed data.
971 data = self._decompressor.unconsumed_tail
972 if n > len(data):
973 data += self._read2(n - len(data))
974 else:
975 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200977 if self._compress_type == ZIP_STORED:
978 self._eof = self._compress_left <= 0
979 elif self._compress_type == ZIP_DEFLATED:
980 n = max(n, self.MIN_READ_SIZE)
981 data = self._decompressor.decompress(data, n)
982 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200983 self._compress_left <= 0 and
984 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200985 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000986 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200987 else:
988 data = self._decompressor.decompress(data)
989 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000990
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200991 data = data[:self._left]
992 self._left -= len(data)
993 if self._left <= 0:
994 self._eof = True
995 self._update_crc(data)
996 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000997
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200998 def _read2(self, n):
999 if self._compress_left <= 0:
1000 return b''
1001
1002 n = max(n, self.MIN_READ_SIZE)
1003 n = min(n, self._compress_left)
1004
1005 data = self._fileobj.read(n)
1006 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001007 if not data:
1008 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001009
1010 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001011 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001012 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013
Łukasz Langae94980a2010-11-22 23:31:26 +00001014 def close(self):
1015 try:
1016 if self._close_fileobj:
1017 self._fileobj.close()
1018 finally:
1019 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001020
John Jolly066df4f2018-01-30 01:51:35 -07001021 def seekable(self):
1022 return self._seekable
1023
1024 def seek(self, offset, whence=0):
1025 if not self._seekable:
1026 raise io.UnsupportedOperation("underlying stream is not seekable")
1027 curr_pos = self.tell()
1028 if whence == 0: # Seek from start of file
1029 new_pos = offset
1030 elif whence == 1: # Seek from current position
1031 new_pos = curr_pos + offset
1032 elif whence == 2: # Seek from EOF
1033 new_pos = self._orig_file_size + offset
1034 else:
1035 raise ValueError("whence must be os.SEEK_SET (0), "
1036 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1037
1038 if new_pos > self._orig_file_size:
1039 new_pos = self._orig_file_size
1040
1041 if new_pos < 0:
1042 new_pos = 0
1043
1044 read_offset = new_pos - curr_pos
1045 buff_offset = read_offset + self._offset
1046
1047 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1048 # Just move the _offset index if the new position is in the _readbuffer
1049 self._offset = buff_offset
1050 read_offset = 0
1051 elif read_offset < 0:
1052 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001053 self._fileobj.seek(self._orig_compress_start)
1054 self._running_crc = self._orig_start_crc
1055 self._compress_left = self._orig_compress_size
1056 self._left = self._orig_file_size
1057 self._readbuffer = b''
1058 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001059 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001060 self._eof = False
1061 read_offset = new_pos
1062
1063 while read_offset > 0:
1064 read_len = min(self.MAX_SEEK_READ, read_offset)
1065 self.read(read_len)
1066 read_offset -= read_len
1067
1068 return self.tell()
1069
1070 def tell(self):
1071 if not self._seekable:
1072 raise io.UnsupportedOperation("underlying stream is not seekable")
1073 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1074 return filepos
1075
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001076
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001077class _ZipWriteFile(io.BufferedIOBase):
1078 def __init__(self, zf, zinfo, zip64):
1079 self._zinfo = zinfo
1080 self._zip64 = zip64
1081 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001082 self._compressor = _get_compressor(zinfo.compress_type,
1083 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001084 self._file_size = 0
1085 self._compress_size = 0
1086 self._crc = 0
1087
1088 @property
1089 def _fileobj(self):
1090 return self._zipfile.fp
1091
1092 def writable(self):
1093 return True
1094
1095 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001096 if self.closed:
1097 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001098 nbytes = len(data)
1099 self._file_size += nbytes
1100 self._crc = crc32(data, self._crc)
1101 if self._compressor:
1102 data = self._compressor.compress(data)
1103 self._compress_size += len(data)
1104 self._fileobj.write(data)
1105 return nbytes
1106
1107 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001108 if self.closed:
1109 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001110 try:
1111 super().close()
1112 # Flush any data from the compressor, and update header info
1113 if self._compressor:
1114 buf = self._compressor.flush()
1115 self._compress_size += len(buf)
1116 self._fileobj.write(buf)
1117 self._zinfo.compress_size = self._compress_size
1118 else:
1119 self._zinfo.compress_size = self._file_size
1120 self._zinfo.CRC = self._crc
1121 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001122
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001123 # Write updated header info
1124 if self._zinfo.flag_bits & 0x08:
1125 # Write CRC and file sizes after the file data
1126 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1127 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1128 self._zinfo.compress_size, self._zinfo.file_size))
1129 self._zipfile.start_dir = self._fileobj.tell()
1130 else:
1131 if not self._zip64:
1132 if self._file_size > ZIP64_LIMIT:
1133 raise RuntimeError(
1134 'File size unexpectedly exceeded ZIP64 limit')
1135 if self._compress_size > ZIP64_LIMIT:
1136 raise RuntimeError(
1137 'Compressed size unexpectedly exceeded ZIP64 limit')
1138 # Seek backwards and write file header (which will now include
1139 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001140
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001141 # Preserve current position in file
1142 self._zipfile.start_dir = self._fileobj.tell()
1143 self._fileobj.seek(self._zinfo.header_offset)
1144 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1145 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001146
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001147 # Successfully written: Add file to our caches
1148 self._zipfile.filelist.append(self._zinfo)
1149 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1150 finally:
1151 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001152
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001153
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001154
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001155class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001156 """ Class with methods to open, read, write, close, list zip files.
1157
Bo Baylesce237c72018-01-29 23:54:07 -06001158 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1159 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001160
Fred Drake3d9091e2001-03-26 15:49:24 +00001161 file: Either the path to the file, or a file-like object.
1162 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001163 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1164 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001165 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1166 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001167 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1168 needed, otherwise it will raise an exception when this would
1169 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001170 compresslevel: None (default for the given compression type) or an integer
1171 specifying the level to pass to the compressor.
1172 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1173 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1174 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001175
Fred Drake3d9091e2001-03-26 15:49:24 +00001176 """
Fred Drake484d7352000-10-02 21:14:52 +00001177
Fred Drake90eac282001-02-28 05:29:34 +00001178 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001179 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001180
Bo Baylesce237c72018-01-29 23:54:07 -06001181 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001182 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001183 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1184 or append 'a'."""
1185 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001186 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001187
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001188 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001189
1190 self._allowZip64 = allowZip64
1191 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001192 self.debug = 0 # Level of printing: 0 through 3
1193 self.NameToInfo = {} # Find file info given name
1194 self.filelist = [] # List of ZipInfo instances for archive
1195 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001196 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001197 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001198 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001199 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001200 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001201
Fred Drake3d9091e2001-03-26 15:49:24 +00001202 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001203 if isinstance(file, os.PathLike):
1204 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001205 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001206 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001207 self._filePassed = 0
1208 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001209 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1210 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001211 filemode = modeDict[mode]
1212 while True:
1213 try:
1214 self.fp = io.open(file, filemode)
1215 except OSError:
1216 if filemode in modeDict:
1217 filemode = modeDict[filemode]
1218 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001219 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001220 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001221 else:
1222 self._filePassed = 1
1223 self.fp = file
1224 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001225 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001226 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001227 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001228 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001229
Antoine Pitrou17babc52012-11-17 23:50:08 +01001230 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001231 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001232 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001233 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001234 # set the modified flag so central directory gets written
1235 # even if no files are added to the archive
1236 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001237 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001238 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001239 except (AttributeError, OSError):
1240 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001241 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001242 self._seekable = False
1243 else:
1244 # Some file-like objects can provide tell() but not seek()
1245 try:
1246 self.fp.seek(self.start_dir)
1247 except (AttributeError, OSError):
1248 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001249 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001250 try:
1251 # See if file is a zip file
1252 self._RealGetContents()
1253 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001254 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001255 except BadZipFile:
1256 # file is not a zip file, just append
1257 self.fp.seek(0, 2)
1258
1259 # set the modified flag so central directory gets written
1260 # even if no files are added to the archive
1261 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001262 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001263 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001264 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001265 except:
1266 fp = self.fp
1267 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001268 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001269 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001270
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001271 def __enter__(self):
1272 return self
1273
1274 def __exit__(self, type, value, traceback):
1275 self.close()
1276
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001277 def __repr__(self):
1278 result = ['<%s.%s' % (self.__class__.__module__,
1279 self.__class__.__qualname__)]
1280 if self.fp is not None:
1281 if self._filePassed:
1282 result.append(' file=%r' % self.fp)
1283 elif self.filename is not None:
1284 result.append(' filename=%r' % self.filename)
1285 result.append(' mode=%r' % self.mode)
1286 else:
1287 result.append(' [closed]')
1288 result.append('>')
1289 return ''.join(result)
1290
Tim Peters7d3bad62001-04-04 18:56:49 +00001291 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001292 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001294 try:
1295 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001296 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001297 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001298 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001299 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001300 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001301 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001302 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1303 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001304 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001305
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001306 # "concat" is zero, unless zip was concatenated to another file
1307 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001308 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1309 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001310 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001311
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001312 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001313 inferred = concat + offset_cd
1314 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001315 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001316 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001318 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001319 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 total = 0
1321 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001322 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001323 if len(centdir) != sizeCentralDir:
1324 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001326 if centdir[_CD_SIGNATURE] != stringCentralDir:
1327 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001329 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001330 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001331 flags = centdir[5]
1332 if flags & 0x800:
1333 # UTF-8 file names extension
1334 filename = filename.decode('utf-8')
1335 else:
1336 # Historical ZIP filename encoding
1337 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001339 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001340 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1341 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001342 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001343 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001344 x.flag_bits, x.compress_type, t, d,
1345 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001346 if x.extract_version > MAX_EXTRACT_VERSION:
1347 raise NotImplementedError("zip file version %.1f" %
1348 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1350 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001351 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001353 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001354
1355 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001356 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001357 self.filelist.append(x)
1358 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001359
1360 # update total bytes read from central directory
1361 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1362 + centdir[_CD_EXTRA_FIELD_LENGTH]
1363 + centdir[_CD_COMMENT_LENGTH])
1364
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001365 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001366 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001367
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001368
1369 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001370 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001371 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001372
1373 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001374 """Return a list of class ZipInfo instances for files in the
1375 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376 return self.filelist
1377
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001378 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001379 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001380 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1381 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001382 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001383 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001384 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1385 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386
1387 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001388 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001389 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001390 for zinfo in self.filelist:
1391 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001392 # Read by chunks, to avoid an OverflowError or a
1393 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001394 with self.open(zinfo.filename, "r") as f:
1395 while f.read(chunk_size): # Check CRC-32
1396 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001397 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001398 return zinfo.filename
1399
1400 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001401 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001402 info = self.NameToInfo.get(name)
1403 if info is None:
1404 raise KeyError(
1405 'There is no item named %r in the archive' % name)
1406
1407 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001408
Thomas Wouterscf297e42007-02-23 15:07:44 +00001409 def setpassword(self, pwd):
1410 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001411 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001412 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001413 if pwd:
1414 self.pwd = pwd
1415 else:
1416 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001417
R David Murrayf50b38a2012-04-12 18:44:58 -04001418 @property
1419 def comment(self):
1420 """The comment text associated with the ZIP file."""
1421 return self._comment
1422
1423 @comment.setter
1424 def comment(self, comment):
1425 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001426 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001427 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001428 if len(comment) > ZIP_MAX_COMMENT:
1429 import warnings
1430 warnings.warn('Archive comment is too long; truncating to %d bytes'
1431 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001432 comment = comment[:ZIP_MAX_COMMENT]
1433 self._comment = comment
1434 self._didModify = True
1435
Thomas Wouterscf297e42007-02-23 15:07:44 +00001436 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001437 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001438 with self.open(name, "r", pwd) as fp:
1439 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001441 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001442 """Return file-like object for 'name'.
1443
1444 name is a string for the file name within the ZIP file, or a ZipInfo
1445 object.
1446
1447 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1448 write to a file newly added to the archive.
1449
1450 pwd is the password to decrypt files (only used for reading).
1451
1452 When writing, if the file size is not known in advance but may exceed
1453 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1454 files. If the size is known in advance, it is best to pass a ZipInfo
1455 instance for name, with zinfo.file_size set.
1456 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001457 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001458 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001459 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001460 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001461 if pwd and (mode == "w"):
1462 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001463 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001464 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001465 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001466
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001467 # Make sure we have an info object
1468 if isinstance(name, ZipInfo):
1469 # 'name' is already an info object
1470 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001471 elif mode == 'w':
1472 zinfo = ZipInfo(name)
1473 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001474 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001475 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001476 # Get info object for name
1477 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001478
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001479 if mode == 'w':
1480 return self._open_to_write(zinfo, force_zip64=force_zip64)
1481
1482 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001483 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001484 "is an open writing handle on it. "
1485 "Close the writing handle before trying to read.")
1486
1487 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001488 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001489 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1490 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001491 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001492 # Skip the file header:
1493 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001494 if len(fheader) != sizeFileHeader:
1495 raise BadZipFile("Truncated file header")
1496 fheader = struct.unpack(structFileHeader, fheader)
1497 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001498 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001499
Antoine Pitrou17babc52012-11-17 23:50:08 +01001500 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1501 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1502 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001503
Antoine Pitrou8572da52012-11-17 23:52:05 +01001504 if zinfo.flag_bits & 0x20:
1505 # Zip 2.7: compressed patched data
1506 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001507
Antoine Pitrou8572da52012-11-17 23:52:05 +01001508 if zinfo.flag_bits & 0x40:
1509 # strong encryption
1510 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001511
Antoine Pitrou17babc52012-11-17 23:50:08 +01001512 if zinfo.flag_bits & 0x800:
1513 # UTF-8 filename
1514 fname_str = fname.decode("utf-8")
1515 else:
1516 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001517
Antoine Pitrou17babc52012-11-17 23:50:08 +01001518 if fname_str != zinfo.orig_filename:
1519 raise BadZipFile(
1520 'File name in directory %r and header %r differ.'
1521 % (zinfo.orig_filename, fname))
1522
1523 # check for encrypted flag & handle password
1524 is_encrypted = zinfo.flag_bits & 0x1
1525 zd = None
1526 if is_encrypted:
1527 if not pwd:
1528 pwd = self.pwd
1529 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001530 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001531 "required for extraction" % name)
1532
1533 zd = _ZipDecrypter(pwd)
1534 # The first 12 bytes in the cypher stream is an encryption header
1535 # used to strengthen the algorithm. The first 11 bytes are
1536 # completely random, while the 12th contains the MSB of the CRC,
1537 # or the MSB of the file time depending on the header type
1538 # and is used to check the correctness of the password.
1539 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001540 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001541 if zinfo.flag_bits & 0x8:
1542 # compare against the file type from extended local headers
1543 check_byte = (zinfo._raw_time >> 8) & 0xff
1544 else:
1545 # compare against the CRC otherwise
1546 check_byte = (zinfo.CRC >> 24) & 0xff
1547 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001548 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001549
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001550 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001551 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001552 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001553 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001554
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001555 def _open_to_write(self, zinfo, force_zip64=False):
1556 if force_zip64 and not self._allowZip64:
1557 raise ValueError(
1558 "force_zip64 is True, but allowZip64 was False when opening "
1559 "the ZIP file."
1560 )
1561 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001562 raise ValueError("Can't write to the ZIP file while there is "
1563 "another write handle open on it. "
1564 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001565
1566 # Sizes and CRC are overwritten with correct data after processing the file
1567 if not hasattr(zinfo, 'file_size'):
1568 zinfo.file_size = 0
1569 zinfo.compress_size = 0
1570 zinfo.CRC = 0
1571
1572 zinfo.flag_bits = 0x00
1573 if zinfo.compress_type == ZIP_LZMA:
1574 # Compressed data includes an end-of-stream (EOS) marker
1575 zinfo.flag_bits |= 0x02
1576 if not self._seekable:
1577 zinfo.flag_bits |= 0x08
1578
1579 if not zinfo.external_attr:
1580 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1581
1582 # Compressed size can be larger than uncompressed size
1583 zip64 = self._allowZip64 and \
1584 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1585
1586 if self._seekable:
1587 self.fp.seek(self.start_dir)
1588 zinfo.header_offset = self.fp.tell()
1589
1590 self._writecheck(zinfo)
1591 self._didModify = True
1592
1593 self.fp.write(zinfo.FileHeader(zip64))
1594
1595 self._writing = True
1596 return _ZipWriteFile(self, zinfo, zip64)
1597
Christian Heimes790c8232008-01-07 21:14:23 +00001598 def extract(self, member, path=None, pwd=None):
1599 """Extract a member from the archive to the current working directory,
1600 using its full name. Its file information is extracted as accurately
1601 as possible. `member' may be a filename or a ZipInfo object. You can
1602 specify a different directory using `path'.
1603 """
Christian Heimes790c8232008-01-07 21:14:23 +00001604 if path is None:
1605 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001606 else:
1607 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001608
1609 return self._extract_member(member, path, pwd)
1610
1611 def extractall(self, path=None, members=None, pwd=None):
1612 """Extract all members from the archive to the current working
1613 directory. `path' specifies a different directory to extract to.
1614 `members' is optional and must be a subset of the list returned
1615 by namelist().
1616 """
1617 if members is None:
1618 members = self.namelist()
1619
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001620 if path is None:
1621 path = os.getcwd()
1622 else:
1623 path = os.fspath(path)
1624
Christian Heimes790c8232008-01-07 21:14:23 +00001625 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001626 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001627
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001628 @classmethod
1629 def _sanitize_windows_name(cls, arcname, pathsep):
1630 """Replace bad characters and remove trailing dots from parts."""
1631 table = cls._windows_illegal_name_trans_table
1632 if not table:
1633 illegal = ':<>|"?*'
1634 table = str.maketrans(illegal, '_' * len(illegal))
1635 cls._windows_illegal_name_trans_table = table
1636 arcname = arcname.translate(table)
1637 # remove trailing dots
1638 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1639 # rejoin, removing empty parts.
1640 arcname = pathsep.join(x for x in arcname if x)
1641 return arcname
1642
Christian Heimes790c8232008-01-07 21:14:23 +00001643 def _extract_member(self, member, targetpath, pwd):
1644 """Extract the ZipInfo object 'member' to a physical
1645 file on the path targetpath.
1646 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001647 if not isinstance(member, ZipInfo):
1648 member = self.getinfo(member)
1649
Christian Heimes790c8232008-01-07 21:14:23 +00001650 # build the destination pathname, replacing
1651 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001652 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001653
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001654 if os.path.altsep:
1655 arcname = arcname.replace(os.path.altsep, os.path.sep)
1656 # interpret absolute pathname as relative, remove drive letter or
1657 # UNC path, redundant separators, "." and ".." components.
1658 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001659 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001660 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001661 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001662 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001663 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001664 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001665
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001666 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001667 targetpath = os.path.normpath(targetpath)
1668
1669 # Create all upper directories if necessary.
1670 upperdirs = os.path.dirname(targetpath)
1671 if upperdirs and not os.path.exists(upperdirs):
1672 os.makedirs(upperdirs)
1673
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001674 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001675 if not os.path.isdir(targetpath):
1676 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001677 return targetpath
1678
Antoine Pitrou17babc52012-11-17 23:50:08 +01001679 with self.open(member, pwd=pwd) as source, \
1680 open(targetpath, "wb") as target:
1681 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001682
1683 return targetpath
1684
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001685 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001686 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001687 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001688 import warnings
1689 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001690 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001691 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001692 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001693 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001694 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001695 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001696 if not self._allowZip64:
1697 requires_zip64 = None
1698 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1699 requires_zip64 = "Files count"
1700 elif zinfo.file_size > ZIP64_LIMIT:
1701 requires_zip64 = "Filesize"
1702 elif zinfo.header_offset > ZIP64_LIMIT:
1703 requires_zip64 = "Zipfile size"
1704 if requires_zip64:
1705 raise LargeZipFile(requires_zip64 +
1706 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001707
Bo Baylesce237c72018-01-29 23:54:07 -06001708 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001709 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001710 """Put the bytes from filename into the archive under the name
1711 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001712 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001713 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001714 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001715 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001716 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001717 "Can't write to ZIP archive while an open writing handle exists"
1718 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001719
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001720 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001721 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001722
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001723 if zinfo.is_dir():
1724 zinfo.compress_size = 0
1725 zinfo.CRC = 0
1726 else:
1727 if compress_type is not None:
1728 zinfo.compress_type = compress_type
1729 else:
1730 zinfo.compress_type = self.compression
1731
Bo Baylesce237c72018-01-29 23:54:07 -06001732 if compresslevel is not None:
1733 zinfo._compresslevel = compresslevel
1734 else:
1735 zinfo._compresslevel = self.compresslevel
1736
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001737 if zinfo.is_dir():
1738 with self._lock:
1739 if self._seekable:
1740 self.fp.seek(self.start_dir)
1741 zinfo.header_offset = self.fp.tell() # Start of header bytes
1742 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001743 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001744 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001745
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001746 self._writecheck(zinfo)
1747 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001748
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001749 self.filelist.append(zinfo)
1750 self.NameToInfo[zinfo.filename] = zinfo
1751 self.fp.write(zinfo.FileHeader(False))
1752 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001753 else:
1754 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1755 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001756
Bo Baylesce237c72018-01-29 23:54:07 -06001757 def writestr(self, zinfo_or_arcname, data,
1758 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001759 """Write a file into the archive. The contents is 'data', which
1760 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1761 it is encoded as UTF-8 first.
1762 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001763 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001764 if isinstance(data, str):
1765 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001766 if not isinstance(zinfo_or_arcname, ZipInfo):
1767 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001768 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001769 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001770 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001771 if zinfo.filename[-1] == '/':
1772 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1773 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1774 else:
1775 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001776 else:
1777 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001778
1779 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001780 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001781 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001782 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001783 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001784 "Can't write to ZIP archive while an open writing handle exists."
1785 )
1786
1787 if compress_type is not None:
1788 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001789
Bo Baylesce237c72018-01-29 23:54:07 -06001790 if compresslevel is not None:
1791 zinfo._compresslevel = compresslevel
1792
Guido van Rossum85825dc2007-08-27 17:03:28 +00001793 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001794 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001795 with self.open(zinfo, mode='w') as dest:
1796 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001797
1798 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001799 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001800 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001801
1802 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001803 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001804 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001805 if self.fp is None:
1806 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001807
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001808 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001809 raise ValueError("Can't close the ZIP file while there is "
1810 "an open writing handle on it. "
1811 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001812
Antoine Pitrou17babc52012-11-17 23:50:08 +01001813 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001814 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001815 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001816 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001817 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001818 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001819 finally:
1820 fp = self.fp
1821 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001822 self._fpclose(fp)
1823
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001824 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001825 for zinfo in self.filelist: # write central directory
1826 dt = zinfo.date_time
1827 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1828 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1829 extra = []
1830 if zinfo.file_size > ZIP64_LIMIT \
1831 or zinfo.compress_size > ZIP64_LIMIT:
1832 extra.append(zinfo.file_size)
1833 extra.append(zinfo.compress_size)
1834 file_size = 0xffffffff
1835 compress_size = 0xffffffff
1836 else:
1837 file_size = zinfo.file_size
1838 compress_size = zinfo.compress_size
1839
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001840 if zinfo.header_offset > ZIP64_LIMIT:
1841 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001842 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001843 else:
1844 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001845
1846 extra_data = zinfo.extra
1847 min_version = 0
1848 if extra:
1849 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001850 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001851 extra_data = struct.pack(
1852 '<HH' + 'Q'*len(extra),
1853 1, 8*len(extra), *extra) + extra_data
1854
1855 min_version = ZIP64_VERSION
1856
1857 if zinfo.compress_type == ZIP_BZIP2:
1858 min_version = max(BZIP2_VERSION, min_version)
1859 elif zinfo.compress_type == ZIP_LZMA:
1860 min_version = max(LZMA_VERSION, min_version)
1861
1862 extract_version = max(min_version, zinfo.extract_version)
1863 create_version = max(min_version, zinfo.create_version)
1864 try:
1865 filename, flag_bits = zinfo._encodeFilenameFlags()
1866 centdir = struct.pack(structCentralDir,
1867 stringCentralDir, create_version,
1868 zinfo.create_system, extract_version, zinfo.reserved,
1869 flag_bits, zinfo.compress_type, dostime, dosdate,
1870 zinfo.CRC, compress_size, file_size,
1871 len(filename), len(extra_data), len(zinfo.comment),
1872 0, zinfo.internal_attr, zinfo.external_attr,
1873 header_offset)
1874 except DeprecationWarning:
1875 print((structCentralDir, stringCentralDir, create_version,
1876 zinfo.create_system, extract_version, zinfo.reserved,
1877 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1878 zinfo.CRC, compress_size, file_size,
1879 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1880 0, zinfo.internal_attr, zinfo.external_attr,
1881 header_offset), file=sys.stderr)
1882 raise
1883 self.fp.write(centdir)
1884 self.fp.write(filename)
1885 self.fp.write(extra_data)
1886 self.fp.write(zinfo.comment)
1887
1888 pos2 = self.fp.tell()
1889 # Write end-of-zip-archive record
1890 centDirCount = len(self.filelist)
1891 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001892 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001893 requires_zip64 = None
1894 if centDirCount > ZIP_FILECOUNT_LIMIT:
1895 requires_zip64 = "Files count"
1896 elif centDirOffset > ZIP64_LIMIT:
1897 requires_zip64 = "Central directory offset"
1898 elif centDirSize > ZIP64_LIMIT:
1899 requires_zip64 = "Central directory size"
1900 if requires_zip64:
1901 # Need to write the ZIP64 end-of-archive records
1902 if not self._allowZip64:
1903 raise LargeZipFile(requires_zip64 +
1904 " would require ZIP64 extensions")
1905 zip64endrec = struct.pack(
1906 structEndArchive64, stringEndArchive64,
1907 44, 45, 45, 0, 0, centDirCount, centDirCount,
1908 centDirSize, centDirOffset)
1909 self.fp.write(zip64endrec)
1910
1911 zip64locrec = struct.pack(
1912 structEndArchive64Locator,
1913 stringEndArchive64Locator, 0, pos2, 1)
1914 self.fp.write(zip64locrec)
1915 centDirCount = min(centDirCount, 0xFFFF)
1916 centDirSize = min(centDirSize, 0xFFFFFFFF)
1917 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1918
1919 endrec = struct.pack(structEndArchive, stringEndArchive,
1920 0, 0, centDirCount, centDirCount,
1921 centDirSize, centDirOffset, len(self._comment))
1922 self.fp.write(endrec)
1923 self.fp.write(self._comment)
1924 self.fp.flush()
1925
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001926 def _fpclose(self, fp):
1927 assert self._fileRefCnt > 0
1928 self._fileRefCnt -= 1
1929 if not self._fileRefCnt and not self._filePassed:
1930 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001931
1932
1933class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001934 """Class to create ZIP archives with Python library files and packages."""
1935
Georg Brandl8334fd92010-12-04 10:26:46 +00001936 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001937 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001938 ZipFile.__init__(self, file, mode=mode, compression=compression,
1939 allowZip64=allowZip64)
1940 self._optimize = optimize
1941
Christian Tismer59202e52013-10-21 03:59:23 +02001942 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001943 """Add all files from "pathname" to the ZIP archive.
1944
Fred Drake484d7352000-10-02 21:14:52 +00001945 If pathname is a package directory, search the directory and
1946 all package subdirectories recursively for all *.py and enter
1947 the modules into the archive. If pathname is a plain
1948 directory, listdir *.py and enter all modules. Else, pathname
1949 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001950 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001951 This method will compile the module.py into module.pyc if
1952 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001953 If filterfunc(pathname) is given, it is called with every argument.
1954 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001955 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001956 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001957 if filterfunc and not filterfunc(pathname):
1958 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001959 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001960 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001961 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001962 dir, name = os.path.split(pathname)
1963 if os.path.isdir(pathname):
1964 initname = os.path.join(pathname, "__init__.py")
1965 if os.path.isfile(initname):
1966 # This is a package directory, add it
1967 if basename:
1968 basename = "%s/%s" % (basename, name)
1969 else:
1970 basename = name
1971 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001972 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001973 fname, arcname = self._get_codename(initname[0:-3], basename)
1974 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001975 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001976 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001977 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001978 dirlist.remove("__init__.py")
1979 # Add all *.py files and package subdirectories
1980 for filename in dirlist:
1981 path = os.path.join(pathname, filename)
1982 root, ext = os.path.splitext(filename)
1983 if os.path.isdir(path):
1984 if os.path.isfile(os.path.join(path, "__init__.py")):
1985 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001986 self.writepy(path, basename,
1987 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001988 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001989 if filterfunc and not filterfunc(path):
1990 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001991 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001992 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001994 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001995 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001996 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001997 self.write(fname, arcname)
1998 else:
1999 # This is NOT a package directory, add its files at top level
2000 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002001 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002002 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002003 path = os.path.join(pathname, filename)
2004 root, ext = os.path.splitext(filename)
2005 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002006 if filterfunc and not filterfunc(path):
2007 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002008 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002009 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002010 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002011 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002012 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002013 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002014 self.write(fname, arcname)
2015 else:
2016 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002017 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002018 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002019 fname, arcname = self._get_codename(pathname[0:-3], basename)
2020 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002021 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002022 self.write(fname, arcname)
2023
2024 def _get_codename(self, pathname, basename):
2025 """Return (filename, archivename) for the path.
2026
Fred Drake484d7352000-10-02 21:14:52 +00002027 Given a module name path, return the correct file path and
2028 archive name, compiling if necessary. For example, given
2029 /python/lib/string, return (/python/lib/string.pyc, string).
2030 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002031 def _compile(file, optimize=-1):
2032 import py_compile
2033 if self.debug:
2034 print("Compiling", file)
2035 try:
2036 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002037 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002038 print(err.msg)
2039 return False
2040 return True
2041
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002042 file_py = pathname + ".py"
2043 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002044 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2045 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2046 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002047 if self._optimize == -1:
2048 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002049 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002050 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2051 # Use .pyc file.
2052 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002053 elif (os.path.isfile(pycache_opt0) and
2054 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002055 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2056 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002057 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002058 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002059 elif (os.path.isfile(pycache_opt1) and
2060 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2061 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002062 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002063 fname = pycache_opt1
2064 arcname = file_pyc
2065 elif (os.path.isfile(pycache_opt2) and
2066 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2067 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2068 # file name in the archive.
2069 fname = pycache_opt2
2070 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002071 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002072 # Compile py into PEP 3147 pyc file.
2073 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002074 if sys.flags.optimize == 0:
2075 fname = pycache_opt0
2076 elif sys.flags.optimize == 1:
2077 fname = pycache_opt1
2078 else:
2079 fname = pycache_opt2
2080 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002081 else:
2082 fname = arcname = file_py
2083 else:
2084 # new mode: use given optimization level
2085 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002086 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002087 arcname = file_pyc
2088 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002089 arcname = file_pyc
2090 if self._optimize == 1:
2091 fname = pycache_opt1
2092 elif self._optimize == 2:
2093 fname = pycache_opt2
2094 else:
2095 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2096 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002097 if not (os.path.isfile(fname) and
2098 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2099 if not _compile(file_py, optimize=self._optimize):
2100 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002101 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002102 if basename:
2103 archivename = "%s/%s" % (basename, archivename)
2104 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002105
2106
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002107class Path:
2108 """
2109 A pathlib-compatible interface for zip files.
2110
2111 Consider a zip file with this structure::
2112
2113 .
2114 ├── a.txt
2115 └── b
2116 ├── c.txt
2117 └── d
2118 └── e.txt
2119
2120 >>> data = io.BytesIO()
2121 >>> zf = ZipFile(data, 'w')
2122 >>> zf.writestr('a.txt', 'content of a')
2123 >>> zf.writestr('b/c.txt', 'content of c')
2124 >>> zf.writestr('b/d/e.txt', 'content of e')
2125 >>> zf.filename = 'abcde.zip'
2126
2127 Path accepts the zipfile object itself or a filename
2128
2129 >>> root = Path(zf)
2130
2131 From there, several path operations are available.
2132
2133 Directory iteration (including the zip file itself):
2134
2135 >>> a, b = root.iterdir()
2136 >>> a
2137 Path('abcde.zip', 'a.txt')
2138 >>> b
2139 Path('abcde.zip', 'b/')
2140
2141 name property:
2142
2143 >>> b.name
2144 'b'
2145
2146 join with divide operator:
2147
2148 >>> c = b / 'c.txt'
2149 >>> c
2150 Path('abcde.zip', 'b/c.txt')
2151 >>> c.name
2152 'c.txt'
2153
2154 Read text:
2155
2156 >>> c.read_text()
2157 'content of c'
2158
2159 existence:
2160
2161 >>> c.exists()
2162 True
2163 >>> (b / 'missing.txt').exists()
2164 False
2165
2166 Coersion to string:
2167
2168 >>> str(c)
2169 'abcde.zip/b/c.txt'
2170 """
2171
2172 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2173
2174 def __init__(self, root, at=""):
2175 self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2176 self.at = at
2177
2178 @property
2179 def open(self):
2180 return functools.partial(self.root.open, self.at)
2181
2182 @property
2183 def name(self):
2184 return posixpath.basename(self.at.rstrip("/"))
2185
2186 def read_text(self, *args, **kwargs):
2187 with self.open() as strm:
2188 return io.TextIOWrapper(strm, *args, **kwargs).read()
2189
2190 def read_bytes(self):
2191 with self.open() as strm:
2192 return strm.read()
2193
2194 def _is_child(self, path):
2195 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2196
2197 def _next(self, at):
2198 return Path(self.root, at)
2199
2200 def is_dir(self):
2201 return not self.at or self.at.endswith("/")
2202
2203 def is_file(self):
2204 return not self.is_dir()
2205
2206 def exists(self):
2207 return self.at in self._names()
2208
2209 def iterdir(self):
2210 if not self.is_dir():
2211 raise ValueError("Can't listdir a file")
2212 subs = map(self._next, self._names())
2213 return filter(self._is_child, subs)
2214
2215 def __str__(self):
2216 return posixpath.join(self.root.filename, self.at)
2217
2218 def __repr__(self):
2219 return self.__repr.format(self=self)
2220
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002221 def joinpath(self, add):
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002222 next = posixpath.join(self.at, add)
2223 next_dir = posixpath.join(self.at, add, "")
2224 names = self._names()
2225 return self._next(next_dir if next not in names and next_dir in names else next)
2226
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002227 __truediv__ = joinpath
2228
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002229 @staticmethod
2230 def _add_implied_dirs(names):
2231 return names + [
2232 name + "/"
2233 for name in map(posixpath.dirname, names)
2234 if name and name + "/" not in names
2235 ]
2236
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002237 @property
2238 def parent(self):
2239 parent_at = posixpath.dirname(self.at)
2240 if parent_at:
2241 parent_at += '/'
2242 return self._next(parent_at)
2243
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002244 def _names(self):
2245 return self._add_implied_dirs(self.root.namelist())
2246
2247
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002248def main(args=None):
2249 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002250
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002251 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002252 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002253 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002254 group.add_argument('-l', '--list', metavar='<zipfile>',
2255 help='Show listing of a zipfile')
2256 group.add_argument('-e', '--extract', nargs=2,
2257 metavar=('<zipfile>', '<output_dir>'),
2258 help='Extract zipfile into target dir')
2259 group.add_argument('-c', '--create', nargs='+',
2260 metavar=('<name>', '<file>'),
2261 help='Create zipfile from sources')
2262 group.add_argument('-t', '--test', metavar='<zipfile>',
2263 help='Test if a zipfile is valid')
2264 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002265
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002266 if args.test is not None:
2267 src = args.test
2268 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002269 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002270 if badfile:
2271 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002272 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002273
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002274 elif args.list is not None:
2275 src = args.list
2276 with ZipFile(src, 'r') as zf:
2277 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002278
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002279 elif args.extract is not None:
2280 src, curdir = args.extract
2281 with ZipFile(src, 'r') as zf:
2282 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002283
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002284 elif args.create is not None:
2285 zip_name = args.create.pop(0)
2286 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002287
2288 def addToZip(zf, path, zippath):
2289 if os.path.isfile(path):
2290 zf.write(path, zippath, ZIP_DEFLATED)
2291 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002292 if zippath:
2293 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002294 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002295 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002296 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002297 # else: ignore
2298
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002299 with ZipFile(zip_name, 'w') as zf:
2300 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002301 zippath = os.path.basename(path)
2302 if not zippath:
2303 zippath = os.path.basename(os.path.dirname(path))
2304 if zippath in ('', os.curdir, os.pardir):
2305 zippath = ''
2306 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002307
2308if __name__ == "__main__":
2309 main()