blob: b90b60f72e2bcd984141063e3697cefaf7fa5394 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000162def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164 if _EndRecData(fp):
165 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200166 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000168 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000170def is_zipfile(filename):
171 """Quickly see if a file is a ZIP file by checking the magic number.
172
173 The filename argument may be a file or file-like object too.
174 """
175 result = False
176 try:
177 if hasattr(filename, "read"):
178 result = _check_zipfile(fp=filename)
179 else:
180 with open(filename, "rb") as fp:
181 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200182 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000183 pass
184 return result
185
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186def _EndRecData64(fpin, offset, endrec):
187 """
188 Read the ZIP64 end-of-archive records and use that to update endrec
189 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000190 try:
191 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000193 # If the seek fails, the file is not large enough to contain a ZIP64
194 # end-of-archive record, so just return the end record we were given.
195 return endrec
196
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200198 if len(data) != sizeEndCentDir64Locator:
199 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000200 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
201 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 return endrec
203
204 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000205 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
207 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
209 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200210 if len(data) != sizeEndCentDir64:
211 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200213 dircount, dircount2, dirsize, diroffset = \
214 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000215 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 return endrec
217
218 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000219 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec[_ECD_DISK_NUMBER] = disk_num
221 endrec[_ECD_DISK_START] = disk_dir
222 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
223 endrec[_ECD_ENTRIES_TOTAL] = dircount2
224 endrec[_ECD_SIZE] = dirsize
225 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000226 return endrec
227
228
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229def _EndRecData(fpin):
230 """Return data from the "End of Central Directory" record, or None.
231
232 The data is a list of the nine items in the ZIP "End of central dir"
233 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234
235 # Determine file size
236 fpin.seek(0, 2)
237 filesize = fpin.tell()
238
239 # Check to see if this is ZIP file with no archive comment (the
240 # "end of central directory" structure should be the last item in the
241 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000242 try:
243 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200244 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000245 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200247 if (len(data) == sizeEndCentDir and
248 data[0:4] == stringEndArchive and
249 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000250 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000251 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 endrec=list(endrec)
253
254 # Append a blank comment and record start offset
255 endrec.append(b"")
256 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000258 # Try to read the "Zip64 end of central directory" structure
259 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Either this is not a ZIP file, or it is a ZIP file with an archive
262 # comment. Search the end of the file for the "end of central directory"
263 # record signature. The comment is the last item in the ZIP file and may be
264 # up to 64K long. It is assumed that the "end of central directory" magic
265 # number does not appear in the comment.
266 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
267 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000268 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000269 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000270 if start >= 0:
271 # found the magic number; attempt to unpack and interpret
272 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if len(recData) != sizeEndCentDir:
274 # Zip file is corrupted.
275 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000276 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400277 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
278 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
279 endrec.append(comment)
280 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000281
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 # Try to read the "Zip64 end of central directory" structure
283 return _EndRecData64(fpin, maxCommentStart + start - filesize,
284 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
286 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200287 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000288
Fred Drake484d7352000-10-02 21:14:52 +0000289
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000291 """Class with attributes describing each file in the ZIP archive."""
292
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000293 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200294 'orig_filename',
295 'filename',
296 'date_time',
297 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600298 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600338 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200371 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200439 if ln+4 > len(extra):
440 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
441 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000447 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448 elif ln == 0:
449 counts = ()
450 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300451 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000452
453 idx = 0
454
455 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000456 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000457 self.file_size = counts[idx]
458 idx += 1
459
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000460 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000461 self.compress_size = counts[idx]
462 idx += 1
463
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000464 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 old = self.header_offset
466 self.header_offset = counts[idx]
467 idx+=1
468
469 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000470
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200471 @classmethod
472 def from_file(cls, filename, arcname=None):
473 """Construct an appropriate ZipInfo for a file on the filesystem.
474
475 filename should be the path to a file or directory on the filesystem.
476
477 arcname is the name which it will have within the archive (by default,
478 this will be the same as filename, but without a drive letter and with
479 leading path separators removed).
480 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200481 if isinstance(filename, os.PathLike):
482 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200483 st = os.stat(filename)
484 isdir = stat.S_ISDIR(st.st_mode)
485 mtime = time.localtime(st.st_mtime)
486 date_time = mtime[0:6]
487 # Create ZipInfo instance to store file information
488 if arcname is None:
489 arcname = filename
490 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
491 while arcname[0] in (os.sep, os.altsep):
492 arcname = arcname[1:]
493 if isdir:
494 arcname += '/'
495 zinfo = cls(arcname, date_time)
496 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
497 if isdir:
498 zinfo.file_size = 0
499 zinfo.external_attr |= 0x10 # MS-DOS directory flag
500 else:
501 zinfo.file_size = st.st_size
502
503 return zinfo
504
505 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300506 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200507 return self.filename[-1] == '/'
508
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000509
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300510# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
511# internal keys. We noticed that a direct implementation is faster than
512# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000513
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300514_crctable = None
515def _gen_crc(crc):
516 for j in range(8):
517 if crc & 1:
518 crc = (crc >> 1) ^ 0xEDB88320
519 else:
520 crc >>= 1
521 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000522
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300523# ZIP supports a password-based form of encryption. Even though known
524# plaintext attacks have been found against it, it is still useful
525# to be able to get data out of such a file.
526#
527# Usage:
528# zd = _ZipDecrypter(mypwd)
529# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000530
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300531def _ZipDecrypter(pwd):
532 key0 = 305419896
533 key1 = 591751049
534 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000535
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300536 global _crctable
537 if _crctable is None:
538 _crctable = list(map(_gen_crc, range(256)))
539 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300541 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000542 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300543 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300545 def update_keys(c):
546 nonlocal key0, key1, key2
547 key0 = crc32(c, key0)
548 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
549 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
550 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552 for p in pwd:
553 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000554
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300555 def decrypter(data):
556 """Decrypt a bytes object."""
557 result = bytearray()
558 append = result.append
559 for c in data:
560 k = key2 | 2
561 c ^= ((k * (k^1)) >> 8) & 0xFF
562 update_keys(c)
563 append(c)
564 return bytes(result)
565
566 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200568
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200569class LZMACompressor:
570
571 def __init__(self):
572 self._comp = None
573
574 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200575 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200576 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200577 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200578 ])
579 return struct.pack('<BBH', 9, 4, len(props)) + props
580
581 def compress(self, data):
582 if self._comp is None:
583 return self._init() + self._comp.compress(data)
584 return self._comp.compress(data)
585
586 def flush(self):
587 if self._comp is None:
588 return self._init() + self._comp.flush()
589 return self._comp.flush()
590
591
592class LZMADecompressor:
593
594 def __init__(self):
595 self._decomp = None
596 self._unconsumed = b''
597 self.eof = False
598
599 def decompress(self, data):
600 if self._decomp is None:
601 self._unconsumed += data
602 if len(self._unconsumed) <= 4:
603 return b''
604 psize, = struct.unpack('<H', self._unconsumed[2:4])
605 if len(self._unconsumed) <= 4 + psize:
606 return b''
607
608 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200609 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
610 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611 ])
612 data = self._unconsumed[4 + psize:]
613 del self._unconsumed
614
615 result = self._decomp.decompress(data)
616 self.eof = self._decomp.eof
617 return result
618
619
620compressor_names = {
621 0: 'store',
622 1: 'shrink',
623 2: 'reduce',
624 3: 'reduce',
625 4: 'reduce',
626 5: 'reduce',
627 6: 'implode',
628 7: 'tokenize',
629 8: 'deflate',
630 9: 'deflate64',
631 10: 'implode',
632 12: 'bzip2',
633 14: 'lzma',
634 18: 'terse',
635 19: 'lz77',
636 97: 'wavpack',
637 98: 'ppmd',
638}
639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640def _check_compression(compression):
641 if compression == ZIP_STORED:
642 pass
643 elif compression == ZIP_DEFLATED:
644 if not zlib:
645 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200646 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647 elif compression == ZIP_BZIP2:
648 if not bz2:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200651 elif compression == ZIP_LZMA:
652 if not lzma:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200655 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300656 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200657
658
Bo Baylesce237c72018-01-29 23:54:07 -0600659def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200660 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600661 if compresslevel is not None:
662 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
663 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200664 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600665 if compresslevel is not None:
666 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600668 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200669 elif compress_type == ZIP_LZMA:
670 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200671 else:
672 return None
673
674
675def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200676 if compress_type == ZIP_STORED:
677 return None
678 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200679 return zlib.decompressobj(-15)
680 elif compress_type == ZIP_BZIP2:
681 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200682 elif compress_type == ZIP_LZMA:
683 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200685 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200686 if descr:
687 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
688 else:
689 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690
691
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200692class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300693 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200694 self._file = file
695 self._pos = pos
696 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200697 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300698 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700699 self.seekable = file.seekable
700 self.tell = file.tell
701
702 def seek(self, offset, whence=0):
703 with self._lock:
704 if self.writing():
705 raise ValueError("Can't reposition in the ZIP file while "
706 "there is an open writing handle on it. "
707 "Close the writing handle before trying to read.")
708 self._file.seek(self._pos)
709 self._pos = self._file.tell()
710 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200711
712 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200713 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300714 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300715 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300716 "is an open writing handle on it. "
717 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200718 self._file.seek(self._pos)
719 data = self._file.read(n)
720 self._pos = self._file.tell()
721 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200722
723 def close(self):
724 if self._file is not None:
725 fileobj = self._file
726 self._file = None
727 self._close(fileobj)
728
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200729# Provide the tell method for unseekable stream
730class _Tellable:
731 def __init__(self, fp):
732 self.fp = fp
733 self.offset = 0
734
735 def write(self, data):
736 n = self.fp.write(data)
737 self.offset += n
738 return n
739
740 def tell(self):
741 return self.offset
742
743 def flush(self):
744 self.fp.flush()
745
746 def close(self):
747 self.fp.close()
748
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200749
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000750class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751 """File-like object for reading an archive member.
752 Is returned by ZipFile.open().
753 """
754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 # Max size supported by decompressor.
756 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000757
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758 # Read from compressed files in 4k blocks.
759 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000760
John Jolly066df4f2018-01-30 01:51:35 -0700761 # Chunk size to read during seek
762 MAX_SEEK_READ = 1 << 24
763
Łukasz Langae94980a2010-11-22 23:31:26 +0000764 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
765 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000766 self._fileobj = fileobj
767 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000768 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000769
Ezio Melotti92b47432010-01-28 01:44:41 +0000770 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000771 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200772 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000773
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200774 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000775
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200776 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 self._readbuffer = b''
778 self._offset = 0
779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 self.newlines = None
781
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000782 # Adjust read size for encrypted files since the first 12 bytes
783 # are for the encryption/password information.
784 if self._decrypter is not None:
785 self._compress_left -= 12
786
787 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788 self.name = zipinfo.filename
789
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000790 if hasattr(zipinfo, 'CRC'):
791 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000792 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000793 else:
794 self._expected_crc = None
795
John Jolly066df4f2018-01-30 01:51:35 -0700796 self._seekable = False
797 try:
798 if fileobj.seekable():
799 self._orig_compress_start = fileobj.tell()
800 self._orig_compress_size = zipinfo.compress_size
801 self._orig_file_size = zipinfo.file_size
802 self._orig_start_crc = self._running_crc
803 self._seekable = True
804 except AttributeError:
805 pass
806
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200807 def __repr__(self):
808 result = ['<%s.%s' % (self.__class__.__module__,
809 self.__class__.__qualname__)]
810 if not self.closed:
811 result.append(' name=%r mode=%r' % (self.name, self.mode))
812 if self._compress_type != ZIP_STORED:
813 result.append(' compress_type=%s' %
814 compressor_names.get(self._compress_type,
815 self._compress_type))
816 else:
817 result.append(' [closed]')
818 result.append('>')
819 return ''.join(result)
820
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000821 def readline(self, limit=-1):
822 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000824 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826
Serhiy Storchakae670be22016-06-11 19:32:44 +0300827 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000828 # Shortcut common case - newline found in buffer.
829 i = self._readbuffer.find(b'\n', self._offset) + 1
830 if i > 0:
831 line = self._readbuffer[self._offset: i]
832 self._offset = i
833 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
Serhiy Storchakae670be22016-06-11 19:32:44 +0300835 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000836
837 def peek(self, n=1):
838 """Returns buffered bytes without advancing the position."""
839 if n > len(self._readbuffer) - self._offset:
840 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200841 if len(chunk) > self._offset:
842 self._readbuffer = chunk + self._readbuffer[self._offset:]
843 self._offset = 0
844 else:
845 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000846
847 # Return up to 512 bytes to reduce allocation overhead for tight loops.
848 return self._readbuffer[self._offset: self._offset + 512]
849
850 def readable(self):
851 return True
852
853 def read(self, n=-1):
854 """Read and return up to n bytes.
855 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200857 if n is None or n < 0:
858 buf = self._readbuffer[self._offset:]
859 self._readbuffer = b''
860 self._offset = 0
861 while not self._eof:
862 buf += self._read1(self.MAX_N)
863 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
Antoine Pitrou78157b32012-06-23 16:44:48 +0200865 end = n + self._offset
866 if end < len(self._readbuffer):
867 buf = self._readbuffer[self._offset:end]
868 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200869 return buf
870
Antoine Pitrou78157b32012-06-23 16:44:48 +0200871 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200872 buf = self._readbuffer[self._offset:]
873 self._readbuffer = b''
874 self._offset = 0
875 while n > 0 and not self._eof:
876 data = self._read1(n)
877 if n < len(data):
878 self._readbuffer = data
879 self._offset = n
880 buf += data[:n]
881 break
882 buf += data
883 n -= len(data)
884 return buf
885
886 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000887 # Update the CRC using the given data.
888 if self._expected_crc is None:
889 # No need to compute the CRC if we don't have a reference value
890 return
Martin Panterb82032f2015-12-11 05:19:29 +0000891 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000892 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200893 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000894 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000895
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000896 def read1(self, n):
897 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200899 if n is None or n < 0:
900 buf = self._readbuffer[self._offset:]
901 self._readbuffer = b''
902 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300903 while not self._eof:
904 data = self._read1(self.MAX_N)
905 if data:
906 buf += data
907 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200908 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909
Antoine Pitrou78157b32012-06-23 16:44:48 +0200910 end = n + self._offset
911 if end < len(self._readbuffer):
912 buf = self._readbuffer[self._offset:end]
913 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 return buf
915
Antoine Pitrou78157b32012-06-23 16:44:48 +0200916 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200917 buf = self._readbuffer[self._offset:]
918 self._readbuffer = b''
919 self._offset = 0
920 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300921 while not self._eof:
922 data = self._read1(n)
923 if n < len(data):
924 self._readbuffer = data
925 self._offset = n
926 buf += data[:n]
927 break
928 if data:
929 buf += data
930 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200931 return buf
932
933 def _read1(self, n):
934 # Read up to n compressed bytes with at most one read() system call,
935 # decrypt and decompress them.
936 if self._eof or n <= 0:
937 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000939 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 if self._compress_type == ZIP_DEFLATED:
941 ## Handle unconsumed data.
942 data = self._decompressor.unconsumed_tail
943 if n > len(data):
944 data += self._read2(n - len(data))
945 else:
946 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000947
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200948 if self._compress_type == ZIP_STORED:
949 self._eof = self._compress_left <= 0
950 elif self._compress_type == ZIP_DEFLATED:
951 n = max(n, self.MIN_READ_SIZE)
952 data = self._decompressor.decompress(data, n)
953 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200954 self._compress_left <= 0 and
955 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200956 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000957 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200958 else:
959 data = self._decompressor.decompress(data)
960 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000961
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 data = data[:self._left]
963 self._left -= len(data)
964 if self._left <= 0:
965 self._eof = True
966 self._update_crc(data)
967 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000968
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200969 def _read2(self, n):
970 if self._compress_left <= 0:
971 return b''
972
973 n = max(n, self.MIN_READ_SIZE)
974 n = min(n, self._compress_left)
975
976 data = self._fileobj.read(n)
977 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200978 if not data:
979 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200980
981 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300982 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000983 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984
Łukasz Langae94980a2010-11-22 23:31:26 +0000985 def close(self):
986 try:
987 if self._close_fileobj:
988 self._fileobj.close()
989 finally:
990 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991
John Jolly066df4f2018-01-30 01:51:35 -0700992 def seekable(self):
993 return self._seekable
994
995 def seek(self, offset, whence=0):
996 if not self._seekable:
997 raise io.UnsupportedOperation("underlying stream is not seekable")
998 curr_pos = self.tell()
999 if whence == 0: # Seek from start of file
1000 new_pos = offset
1001 elif whence == 1: # Seek from current position
1002 new_pos = curr_pos + offset
1003 elif whence == 2: # Seek from EOF
1004 new_pos = self._orig_file_size + offset
1005 else:
1006 raise ValueError("whence must be os.SEEK_SET (0), "
1007 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1008
1009 if new_pos > self._orig_file_size:
1010 new_pos = self._orig_file_size
1011
1012 if new_pos < 0:
1013 new_pos = 0
1014
1015 read_offset = new_pos - curr_pos
1016 buff_offset = read_offset + self._offset
1017
1018 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1019 # Just move the _offset index if the new position is in the _readbuffer
1020 self._offset = buff_offset
1021 read_offset = 0
1022 elif read_offset < 0:
1023 # Position is before the current position. Reset the ZipExtFile
1024
1025 self._fileobj.seek(self._orig_compress_start)
1026 self._running_crc = self._orig_start_crc
1027 self._compress_left = self._orig_compress_size
1028 self._left = self._orig_file_size
1029 self._readbuffer = b''
1030 self._offset = 0
1031 self._decompressor = zipfile._get_decompressor(self._compress_type)
1032 self._eof = False
1033 read_offset = new_pos
1034
1035 while read_offset > 0:
1036 read_len = min(self.MAX_SEEK_READ, read_offset)
1037 self.read(read_len)
1038 read_offset -= read_len
1039
1040 return self.tell()
1041
1042 def tell(self):
1043 if not self._seekable:
1044 raise io.UnsupportedOperation("underlying stream is not seekable")
1045 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1046 return filepos
1047
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001048
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001049class _ZipWriteFile(io.BufferedIOBase):
1050 def __init__(self, zf, zinfo, zip64):
1051 self._zinfo = zinfo
1052 self._zip64 = zip64
1053 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001054 self._compressor = _get_compressor(zinfo.compress_type,
1055 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001056 self._file_size = 0
1057 self._compress_size = 0
1058 self._crc = 0
1059
1060 @property
1061 def _fileobj(self):
1062 return self._zipfile.fp
1063
1064 def writable(self):
1065 return True
1066
1067 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001068 if self.closed:
1069 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001070 nbytes = len(data)
1071 self._file_size += nbytes
1072 self._crc = crc32(data, self._crc)
1073 if self._compressor:
1074 data = self._compressor.compress(data)
1075 self._compress_size += len(data)
1076 self._fileobj.write(data)
1077 return nbytes
1078
1079 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001080 if self.closed:
1081 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001082 super().close()
1083 # Flush any data from the compressor, and update header info
1084 if self._compressor:
1085 buf = self._compressor.flush()
1086 self._compress_size += len(buf)
1087 self._fileobj.write(buf)
1088 self._zinfo.compress_size = self._compress_size
1089 else:
1090 self._zinfo.compress_size = self._file_size
1091 self._zinfo.CRC = self._crc
1092 self._zinfo.file_size = self._file_size
1093
1094 # Write updated header info
1095 if self._zinfo.flag_bits & 0x08:
1096 # Write CRC and file sizes after the file data
1097 fmt = '<LQQ' if self._zip64 else '<LLL'
1098 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1099 self._zinfo.compress_size, self._zinfo.file_size))
1100 self._zipfile.start_dir = self._fileobj.tell()
1101 else:
1102 if not self._zip64:
1103 if self._file_size > ZIP64_LIMIT:
1104 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1105 'limit')
1106 if self._compress_size > ZIP64_LIMIT:
1107 raise RuntimeError('Compressed size unexpectedly exceeded '
1108 'ZIP64 limit')
1109 # Seek backwards and write file header (which will now include
1110 # correct CRC and file sizes)
1111
1112 # Preserve current position in file
1113 self._zipfile.start_dir = self._fileobj.tell()
1114 self._fileobj.seek(self._zinfo.header_offset)
1115 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1116 self._fileobj.seek(self._zipfile.start_dir)
1117
1118 self._zipfile._writing = False
1119
1120 # Successfully written: Add file to our caches
1121 self._zipfile.filelist.append(self._zinfo)
1122 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1123
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001124class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001125 """ Class with methods to open, read, write, close, list zip files.
1126
Bo Baylesce237c72018-01-29 23:54:07 -06001127 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1128 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001129
Fred Drake3d9091e2001-03-26 15:49:24 +00001130 file: Either the path to the file, or a file-like object.
1131 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001132 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1133 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001134 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1135 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001136 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1137 needed, otherwise it will raise an exception when this would
1138 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001139 compresslevel: None (default for the given compression type) or an integer
1140 specifying the level to pass to the compressor.
1141 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1142 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1143 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001144
Fred Drake3d9091e2001-03-26 15:49:24 +00001145 """
Fred Drake484d7352000-10-02 21:14:52 +00001146
Fred Drake90eac282001-02-28 05:29:34 +00001147 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001148 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001149
Bo Baylesce237c72018-01-29 23:54:07 -06001150 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1151 compresslevel=None):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001152 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1153 or append 'a'."""
1154 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001155 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001156
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001157 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001158
1159 self._allowZip64 = allowZip64
1160 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001161 self.debug = 0 # Level of printing: 0 through 3
1162 self.NameToInfo = {} # Find file info given name
1163 self.filelist = [] # List of ZipInfo instances for archive
1164 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001165 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001166 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001167 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001168 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001169
Fred Drake3d9091e2001-03-26 15:49:24 +00001170 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001171 if isinstance(file, os.PathLike):
1172 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001173 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001174 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001175 self._filePassed = 0
1176 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001177 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1178 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001179 filemode = modeDict[mode]
1180 while True:
1181 try:
1182 self.fp = io.open(file, filemode)
1183 except OSError:
1184 if filemode in modeDict:
1185 filemode = modeDict[filemode]
1186 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001187 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001188 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001189 else:
1190 self._filePassed = 1
1191 self.fp = file
1192 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001193 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001194 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001195 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001196 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001197
Antoine Pitrou17babc52012-11-17 23:50:08 +01001198 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001199 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001200 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001201 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001202 # set the modified flag so central directory gets written
1203 # even if no files are added to the archive
1204 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001205 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001206 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001207 except (AttributeError, OSError):
1208 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001209 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001210 self._seekable = False
1211 else:
1212 # Some file-like objects can provide tell() but not seek()
1213 try:
1214 self.fp.seek(self.start_dir)
1215 except (AttributeError, OSError):
1216 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001217 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001218 try:
1219 # See if file is a zip file
1220 self._RealGetContents()
1221 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001222 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001223 except BadZipFile:
1224 # file is not a zip file, just append
1225 self.fp.seek(0, 2)
1226
1227 # set the modified flag so central directory gets written
1228 # even if no files are added to the archive
1229 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001230 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001231 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001232 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001233 except:
1234 fp = self.fp
1235 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001236 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001237 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001238
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001239 def __enter__(self):
1240 return self
1241
1242 def __exit__(self, type, value, traceback):
1243 self.close()
1244
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001245 def __repr__(self):
1246 result = ['<%s.%s' % (self.__class__.__module__,
1247 self.__class__.__qualname__)]
1248 if self.fp is not None:
1249 if self._filePassed:
1250 result.append(' file=%r' % self.fp)
1251 elif self.filename is not None:
1252 result.append(' filename=%r' % self.filename)
1253 result.append(' mode=%r' % self.mode)
1254 else:
1255 result.append(' [closed]')
1256 result.append('>')
1257 return ''.join(result)
1258
Tim Peters7d3bad62001-04-04 18:56:49 +00001259 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001260 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001262 try:
1263 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001264 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001265 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001266 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001267 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001269 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001270 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1271 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001272 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001273
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001274 # "concat" is zero, unless zip was concatenated to another file
1275 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001276 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1277 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001278 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001279
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001281 inferred = concat + offset_cd
1282 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001284 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001285 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001286 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001287 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 total = 0
1289 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001290 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001291 if len(centdir) != sizeCentralDir:
1292 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001294 if centdir[_CD_SIGNATURE] != stringCentralDir:
1295 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001296 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001297 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001298 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001299 flags = centdir[5]
1300 if flags & 0x800:
1301 # UTF-8 file names extension
1302 filename = filename.decode('utf-8')
1303 else:
1304 # Historical ZIP filename encoding
1305 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001306 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001307 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001308 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1309 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001310 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001311 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001312 x.flag_bits, x.compress_type, t, d,
1313 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001314 if x.extract_version > MAX_EXTRACT_VERSION:
1315 raise NotImplementedError("zip file version %.1f" %
1316 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1318 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001319 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001321 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001322
1323 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001324 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 self.filelist.append(x)
1326 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001327
1328 # update total bytes read from central directory
1329 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1330 + centdir[_CD_EXTRA_FIELD_LENGTH]
1331 + centdir[_CD_COMMENT_LENGTH])
1332
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001334 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001335
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336
1337 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001338 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001339 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340
1341 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001342 """Return a list of class ZipInfo instances for files in the
1343 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 return self.filelist
1345
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001346 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001347 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001348 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1349 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001351 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001352 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1353 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001354
1355 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001356 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001357 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 for zinfo in self.filelist:
1359 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001360 # Read by chunks, to avoid an OverflowError or a
1361 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001362 with self.open(zinfo.filename, "r") as f:
1363 while f.read(chunk_size): # Check CRC-32
1364 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001365 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366 return zinfo.filename
1367
1368 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001369 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001370 info = self.NameToInfo.get(name)
1371 if info is None:
1372 raise KeyError(
1373 'There is no item named %r in the archive' % name)
1374
1375 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376
Thomas Wouterscf297e42007-02-23 15:07:44 +00001377 def setpassword(self, pwd):
1378 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001379 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001380 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001381 if pwd:
1382 self.pwd = pwd
1383 else:
1384 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001385
R David Murrayf50b38a2012-04-12 18:44:58 -04001386 @property
1387 def comment(self):
1388 """The comment text associated with the ZIP file."""
1389 return self._comment
1390
1391 @comment.setter
1392 def comment(self, comment):
1393 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001394 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001395 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001396 if len(comment) > ZIP_MAX_COMMENT:
1397 import warnings
1398 warnings.warn('Archive comment is too long; truncating to %d bytes'
1399 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001400 comment = comment[:ZIP_MAX_COMMENT]
1401 self._comment = comment
1402 self._didModify = True
1403
Thomas Wouterscf297e42007-02-23 15:07:44 +00001404 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001405 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001406 with self.open(name, "r", pwd) as fp:
1407 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001408
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001409 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001410 """Return file-like object for 'name'.
1411
1412 name is a string for the file name within the ZIP file, or a ZipInfo
1413 object.
1414
1415 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1416 write to a file newly added to the archive.
1417
1418 pwd is the password to decrypt files (only used for reading).
1419
1420 When writing, if the file size is not known in advance but may exceed
1421 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1422 files. If the size is known in advance, it is best to pass a ZipInfo
1423 instance for name, with zinfo.file_size set.
1424 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001425 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001426 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001427 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001428 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001429 if pwd and (mode == "w"):
1430 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001431 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001432 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001433 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001434
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001435 # Make sure we have an info object
1436 if isinstance(name, ZipInfo):
1437 # 'name' is already an info object
1438 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001439 elif mode == 'w':
1440 zinfo = ZipInfo(name)
1441 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001442 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001443 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001444 # Get info object for name
1445 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001446
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001447 if mode == 'w':
1448 return self._open_to_write(zinfo, force_zip64=force_zip64)
1449
1450 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001451 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001452 "is an open writing handle on it. "
1453 "Close the writing handle before trying to read.")
1454
1455 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001456 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001457 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1458 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001459 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001460 # Skip the file header:
1461 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001462 if len(fheader) != sizeFileHeader:
1463 raise BadZipFile("Truncated file header")
1464 fheader = struct.unpack(structFileHeader, fheader)
1465 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001466 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001467
Antoine Pitrou17babc52012-11-17 23:50:08 +01001468 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1469 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1470 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001471
Antoine Pitrou8572da52012-11-17 23:52:05 +01001472 if zinfo.flag_bits & 0x20:
1473 # Zip 2.7: compressed patched data
1474 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001475
Antoine Pitrou8572da52012-11-17 23:52:05 +01001476 if zinfo.flag_bits & 0x40:
1477 # strong encryption
1478 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001479
Antoine Pitrou17babc52012-11-17 23:50:08 +01001480 if zinfo.flag_bits & 0x800:
1481 # UTF-8 filename
1482 fname_str = fname.decode("utf-8")
1483 else:
1484 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001485
Antoine Pitrou17babc52012-11-17 23:50:08 +01001486 if fname_str != zinfo.orig_filename:
1487 raise BadZipFile(
1488 'File name in directory %r and header %r differ.'
1489 % (zinfo.orig_filename, fname))
1490
1491 # check for encrypted flag & handle password
1492 is_encrypted = zinfo.flag_bits & 0x1
1493 zd = None
1494 if is_encrypted:
1495 if not pwd:
1496 pwd = self.pwd
1497 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001498 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001499 "required for extraction" % name)
1500
1501 zd = _ZipDecrypter(pwd)
1502 # The first 12 bytes in the cypher stream is an encryption header
1503 # used to strengthen the algorithm. The first 11 bytes are
1504 # completely random, while the 12th contains the MSB of the CRC,
1505 # or the MSB of the file time depending on the header type
1506 # and is used to check the correctness of the password.
1507 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001508 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001509 if zinfo.flag_bits & 0x8:
1510 # compare against the file type from extended local headers
1511 check_byte = (zinfo._raw_time >> 8) & 0xff
1512 else:
1513 # compare against the CRC otherwise
1514 check_byte = (zinfo.CRC >> 24) & 0xff
1515 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001516 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001517
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001518 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001519 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001520 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001521 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001522
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001523 def _open_to_write(self, zinfo, force_zip64=False):
1524 if force_zip64 and not self._allowZip64:
1525 raise ValueError(
1526 "force_zip64 is True, but allowZip64 was False when opening "
1527 "the ZIP file."
1528 )
1529 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001530 raise ValueError("Can't write to the ZIP file while there is "
1531 "another write handle open on it. "
1532 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001533
1534 # Sizes and CRC are overwritten with correct data after processing the file
1535 if not hasattr(zinfo, 'file_size'):
1536 zinfo.file_size = 0
1537 zinfo.compress_size = 0
1538 zinfo.CRC = 0
1539
1540 zinfo.flag_bits = 0x00
1541 if zinfo.compress_type == ZIP_LZMA:
1542 # Compressed data includes an end-of-stream (EOS) marker
1543 zinfo.flag_bits |= 0x02
1544 if not self._seekable:
1545 zinfo.flag_bits |= 0x08
1546
1547 if not zinfo.external_attr:
1548 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1549
1550 # Compressed size can be larger than uncompressed size
1551 zip64 = self._allowZip64 and \
1552 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1553
1554 if self._seekable:
1555 self.fp.seek(self.start_dir)
1556 zinfo.header_offset = self.fp.tell()
1557
1558 self._writecheck(zinfo)
1559 self._didModify = True
1560
1561 self.fp.write(zinfo.FileHeader(zip64))
1562
1563 self._writing = True
1564 return _ZipWriteFile(self, zinfo, zip64)
1565
Christian Heimes790c8232008-01-07 21:14:23 +00001566 def extract(self, member, path=None, pwd=None):
1567 """Extract a member from the archive to the current working directory,
1568 using its full name. Its file information is extracted as accurately
1569 as possible. `member' may be a filename or a ZipInfo object. You can
1570 specify a different directory using `path'.
1571 """
Christian Heimes790c8232008-01-07 21:14:23 +00001572 if path is None:
1573 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001574 else:
1575 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001576
1577 return self._extract_member(member, path, pwd)
1578
1579 def extractall(self, path=None, members=None, pwd=None):
1580 """Extract all members from the archive to the current working
1581 directory. `path' specifies a different directory to extract to.
1582 `members' is optional and must be a subset of the list returned
1583 by namelist().
1584 """
1585 if members is None:
1586 members = self.namelist()
1587
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001588 if path is None:
1589 path = os.getcwd()
1590 else:
1591 path = os.fspath(path)
1592
Christian Heimes790c8232008-01-07 21:14:23 +00001593 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001594 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001595
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001596 @classmethod
1597 def _sanitize_windows_name(cls, arcname, pathsep):
1598 """Replace bad characters and remove trailing dots from parts."""
1599 table = cls._windows_illegal_name_trans_table
1600 if not table:
1601 illegal = ':<>|"?*'
1602 table = str.maketrans(illegal, '_' * len(illegal))
1603 cls._windows_illegal_name_trans_table = table
1604 arcname = arcname.translate(table)
1605 # remove trailing dots
1606 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1607 # rejoin, removing empty parts.
1608 arcname = pathsep.join(x for x in arcname if x)
1609 return arcname
1610
Christian Heimes790c8232008-01-07 21:14:23 +00001611 def _extract_member(self, member, targetpath, pwd):
1612 """Extract the ZipInfo object 'member' to a physical
1613 file on the path targetpath.
1614 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001615 if not isinstance(member, ZipInfo):
1616 member = self.getinfo(member)
1617
Christian Heimes790c8232008-01-07 21:14:23 +00001618 # build the destination pathname, replacing
1619 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001620 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001621
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001622 if os.path.altsep:
1623 arcname = arcname.replace(os.path.altsep, os.path.sep)
1624 # interpret absolute pathname as relative, remove drive letter or
1625 # UNC path, redundant separators, "." and ".." components.
1626 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001627 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001628 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001629 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001630 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001631 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001632 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001633
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001634 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001635 targetpath = os.path.normpath(targetpath)
1636
1637 # Create all upper directories if necessary.
1638 upperdirs = os.path.dirname(targetpath)
1639 if upperdirs and not os.path.exists(upperdirs):
1640 os.makedirs(upperdirs)
1641
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001642 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001643 if not os.path.isdir(targetpath):
1644 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001645 return targetpath
1646
Antoine Pitrou17babc52012-11-17 23:50:08 +01001647 with self.open(member, pwd=pwd) as source, \
1648 open(targetpath, "wb") as target:
1649 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001650
1651 return targetpath
1652
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001653 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001654 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001655 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001656 import warnings
1657 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001658 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001659 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001660 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001661 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001662 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001663 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001664 if not self._allowZip64:
1665 requires_zip64 = None
1666 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1667 requires_zip64 = "Files count"
1668 elif zinfo.file_size > ZIP64_LIMIT:
1669 requires_zip64 = "Filesize"
1670 elif zinfo.header_offset > ZIP64_LIMIT:
1671 requires_zip64 = "Zipfile size"
1672 if requires_zip64:
1673 raise LargeZipFile(requires_zip64 +
1674 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001675
Bo Baylesce237c72018-01-29 23:54:07 -06001676 def write(self, filename, arcname=None,
1677 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001678 """Put the bytes from filename into the archive under the name
1679 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001680 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001681 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001682 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001683 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001684 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001685 "Can't write to ZIP archive while an open writing handle exists"
1686 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001687
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001688 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001689
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001690 if zinfo.is_dir():
1691 zinfo.compress_size = 0
1692 zinfo.CRC = 0
1693 else:
1694 if compress_type is not None:
1695 zinfo.compress_type = compress_type
1696 else:
1697 zinfo.compress_type = self.compression
1698
Bo Baylesce237c72018-01-29 23:54:07 -06001699 if compresslevel is not None:
1700 zinfo._compresslevel = compresslevel
1701 else:
1702 zinfo._compresslevel = self.compresslevel
1703
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001704 if zinfo.is_dir():
1705 with self._lock:
1706 if self._seekable:
1707 self.fp.seek(self.start_dir)
1708 zinfo.header_offset = self.fp.tell() # Start of header bytes
1709 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001710 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001711 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001712
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001713 self._writecheck(zinfo)
1714 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001715
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001716 self.filelist.append(zinfo)
1717 self.NameToInfo[zinfo.filename] = zinfo
1718 self.fp.write(zinfo.FileHeader(False))
1719 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001720 else:
1721 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1722 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001723
Bo Baylesce237c72018-01-29 23:54:07 -06001724 def writestr(self, zinfo_or_arcname, data,
1725 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001726 """Write a file into the archive. The contents is 'data', which
1727 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1728 it is encoded as UTF-8 first.
1729 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001730 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001731 if isinstance(data, str):
1732 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001733 if not isinstance(zinfo_or_arcname, ZipInfo):
1734 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001735 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001736 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001737 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001738 if zinfo.filename[-1] == '/':
1739 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1740 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1741 else:
1742 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001743 else:
1744 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001745
1746 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001747 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001748 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001749 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001750 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001751 "Can't write to ZIP archive while an open writing handle exists."
1752 )
1753
1754 if compress_type is not None:
1755 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001756
Bo Baylesce237c72018-01-29 23:54:07 -06001757 if compresslevel is not None:
1758 zinfo._compresslevel = compresslevel
1759
Guido van Rossum85825dc2007-08-27 17:03:28 +00001760 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001761 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001762 with self.open(zinfo, mode='w') as dest:
1763 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001764
1765 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001766 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001767 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001768
1769 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001770 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001771 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001772 if self.fp is None:
1773 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001774
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001775 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001776 raise ValueError("Can't close the ZIP file while there is "
1777 "an open writing handle on it. "
1778 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001779
Antoine Pitrou17babc52012-11-17 23:50:08 +01001780 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001781 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001782 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001783 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001784 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001785 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001786 finally:
1787 fp = self.fp
1788 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001789 self._fpclose(fp)
1790
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001791 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001792 for zinfo in self.filelist: # write central directory
1793 dt = zinfo.date_time
1794 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1795 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1796 extra = []
1797 if zinfo.file_size > ZIP64_LIMIT \
1798 or zinfo.compress_size > ZIP64_LIMIT:
1799 extra.append(zinfo.file_size)
1800 extra.append(zinfo.compress_size)
1801 file_size = 0xffffffff
1802 compress_size = 0xffffffff
1803 else:
1804 file_size = zinfo.file_size
1805 compress_size = zinfo.compress_size
1806
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001807 if zinfo.header_offset > ZIP64_LIMIT:
1808 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001809 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001810 else:
1811 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001812
1813 extra_data = zinfo.extra
1814 min_version = 0
1815 if extra:
1816 # Append a ZIP64 field to the extra's
1817 extra_data = struct.pack(
1818 '<HH' + 'Q'*len(extra),
1819 1, 8*len(extra), *extra) + extra_data
1820
1821 min_version = ZIP64_VERSION
1822
1823 if zinfo.compress_type == ZIP_BZIP2:
1824 min_version = max(BZIP2_VERSION, min_version)
1825 elif zinfo.compress_type == ZIP_LZMA:
1826 min_version = max(LZMA_VERSION, min_version)
1827
1828 extract_version = max(min_version, zinfo.extract_version)
1829 create_version = max(min_version, zinfo.create_version)
1830 try:
1831 filename, flag_bits = zinfo._encodeFilenameFlags()
1832 centdir = struct.pack(structCentralDir,
1833 stringCentralDir, create_version,
1834 zinfo.create_system, extract_version, zinfo.reserved,
1835 flag_bits, zinfo.compress_type, dostime, dosdate,
1836 zinfo.CRC, compress_size, file_size,
1837 len(filename), len(extra_data), len(zinfo.comment),
1838 0, zinfo.internal_attr, zinfo.external_attr,
1839 header_offset)
1840 except DeprecationWarning:
1841 print((structCentralDir, stringCentralDir, create_version,
1842 zinfo.create_system, extract_version, zinfo.reserved,
1843 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1844 zinfo.CRC, compress_size, file_size,
1845 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1846 0, zinfo.internal_attr, zinfo.external_attr,
1847 header_offset), file=sys.stderr)
1848 raise
1849 self.fp.write(centdir)
1850 self.fp.write(filename)
1851 self.fp.write(extra_data)
1852 self.fp.write(zinfo.comment)
1853
1854 pos2 = self.fp.tell()
1855 # Write end-of-zip-archive record
1856 centDirCount = len(self.filelist)
1857 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001858 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001859 requires_zip64 = None
1860 if centDirCount > ZIP_FILECOUNT_LIMIT:
1861 requires_zip64 = "Files count"
1862 elif centDirOffset > ZIP64_LIMIT:
1863 requires_zip64 = "Central directory offset"
1864 elif centDirSize > ZIP64_LIMIT:
1865 requires_zip64 = "Central directory size"
1866 if requires_zip64:
1867 # Need to write the ZIP64 end-of-archive records
1868 if not self._allowZip64:
1869 raise LargeZipFile(requires_zip64 +
1870 " would require ZIP64 extensions")
1871 zip64endrec = struct.pack(
1872 structEndArchive64, stringEndArchive64,
1873 44, 45, 45, 0, 0, centDirCount, centDirCount,
1874 centDirSize, centDirOffset)
1875 self.fp.write(zip64endrec)
1876
1877 zip64locrec = struct.pack(
1878 structEndArchive64Locator,
1879 stringEndArchive64Locator, 0, pos2, 1)
1880 self.fp.write(zip64locrec)
1881 centDirCount = min(centDirCount, 0xFFFF)
1882 centDirSize = min(centDirSize, 0xFFFFFFFF)
1883 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1884
1885 endrec = struct.pack(structEndArchive, stringEndArchive,
1886 0, 0, centDirCount, centDirCount,
1887 centDirSize, centDirOffset, len(self._comment))
1888 self.fp.write(endrec)
1889 self.fp.write(self._comment)
1890 self.fp.flush()
1891
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001892 def _fpclose(self, fp):
1893 assert self._fileRefCnt > 0
1894 self._fileRefCnt -= 1
1895 if not self._fileRefCnt and not self._filePassed:
1896 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001897
1898
1899class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001900 """Class to create ZIP archives with Python library files and packages."""
1901
Georg Brandl8334fd92010-12-04 10:26:46 +00001902 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001903 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001904 ZipFile.__init__(self, file, mode=mode, compression=compression,
1905 allowZip64=allowZip64)
1906 self._optimize = optimize
1907
Christian Tismer59202e52013-10-21 03:59:23 +02001908 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001909 """Add all files from "pathname" to the ZIP archive.
1910
Fred Drake484d7352000-10-02 21:14:52 +00001911 If pathname is a package directory, search the directory and
1912 all package subdirectories recursively for all *.py and enter
1913 the modules into the archive. If pathname is a plain
1914 directory, listdir *.py and enter all modules. Else, pathname
1915 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001916 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001917 This method will compile the module.py into module.pyc if
1918 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001919 If filterfunc(pathname) is given, it is called with every argument.
1920 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001921 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001922 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001923 if filterfunc and not filterfunc(pathname):
1924 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001925 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001926 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001927 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001928 dir, name = os.path.split(pathname)
1929 if os.path.isdir(pathname):
1930 initname = os.path.join(pathname, "__init__.py")
1931 if os.path.isfile(initname):
1932 # This is a package directory, add it
1933 if basename:
1934 basename = "%s/%s" % (basename, name)
1935 else:
1936 basename = name
1937 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001938 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001939 fname, arcname = self._get_codename(initname[0:-3], basename)
1940 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001941 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001942 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001943 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001944 dirlist.remove("__init__.py")
1945 # Add all *.py files and package subdirectories
1946 for filename in dirlist:
1947 path = os.path.join(pathname, filename)
1948 root, ext = os.path.splitext(filename)
1949 if os.path.isdir(path):
1950 if os.path.isfile(os.path.join(path, "__init__.py")):
1951 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001952 self.writepy(path, basename,
1953 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001954 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001955 if filterfunc and not filterfunc(path):
1956 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001957 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001958 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001959 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001960 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001961 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001962 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001963 self.write(fname, arcname)
1964 else:
1965 # This is NOT a package directory, add its files at top level
1966 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001967 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001968 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001969 path = os.path.join(pathname, filename)
1970 root, ext = os.path.splitext(filename)
1971 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001972 if filterfunc and not filterfunc(path):
1973 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001974 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001975 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001976 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001977 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001978 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001979 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001980 self.write(fname, arcname)
1981 else:
1982 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001983 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001984 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001985 fname, arcname = self._get_codename(pathname[0:-3], basename)
1986 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001987 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001988 self.write(fname, arcname)
1989
1990 def _get_codename(self, pathname, basename):
1991 """Return (filename, archivename) for the path.
1992
Fred Drake484d7352000-10-02 21:14:52 +00001993 Given a module name path, return the correct file path and
1994 archive name, compiling if necessary. For example, given
1995 /python/lib/string, return (/python/lib/string.pyc, string).
1996 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001997 def _compile(file, optimize=-1):
1998 import py_compile
1999 if self.debug:
2000 print("Compiling", file)
2001 try:
2002 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002003 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002004 print(err.msg)
2005 return False
2006 return True
2007
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002008 file_py = pathname + ".py"
2009 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002010 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2011 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2012 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002013 if self._optimize == -1:
2014 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002015 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002016 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2017 # Use .pyc file.
2018 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002019 elif (os.path.isfile(pycache_opt0) and
2020 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002021 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2022 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002023 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002024 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002025 elif (os.path.isfile(pycache_opt1) and
2026 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2027 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002028 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002029 fname = pycache_opt1
2030 arcname = file_pyc
2031 elif (os.path.isfile(pycache_opt2) and
2032 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2033 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2034 # file name in the archive.
2035 fname = pycache_opt2
2036 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002037 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002038 # Compile py into PEP 3147 pyc file.
2039 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002040 if sys.flags.optimize == 0:
2041 fname = pycache_opt0
2042 elif sys.flags.optimize == 1:
2043 fname = pycache_opt1
2044 else:
2045 fname = pycache_opt2
2046 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002047 else:
2048 fname = arcname = file_py
2049 else:
2050 # new mode: use given optimization level
2051 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002052 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002053 arcname = file_pyc
2054 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002055 arcname = file_pyc
2056 if self._optimize == 1:
2057 fname = pycache_opt1
2058 elif self._optimize == 2:
2059 fname = pycache_opt2
2060 else:
2061 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2062 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002063 if not (os.path.isfile(fname) and
2064 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2065 if not _compile(file_py, optimize=self._optimize):
2066 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002067 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002068 if basename:
2069 archivename = "%s/%s" % (basename, archivename)
2070 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002071
2072
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002073def main(args=None):
2074 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002075
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002076 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002077 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002078 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002079 group.add_argument('-l', '--list', metavar='<zipfile>',
2080 help='Show listing of a zipfile')
2081 group.add_argument('-e', '--extract', nargs=2,
2082 metavar=('<zipfile>', '<output_dir>'),
2083 help='Extract zipfile into target dir')
2084 group.add_argument('-c', '--create', nargs='+',
2085 metavar=('<name>', '<file>'),
2086 help='Create zipfile from sources')
2087 group.add_argument('-t', '--test', metavar='<zipfile>',
2088 help='Test if a zipfile is valid')
2089 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002090
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002091 if args.test is not None:
2092 src = args.test
2093 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002094 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002095 if badfile:
2096 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002097 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002098
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002099 elif args.list is not None:
2100 src = args.list
2101 with ZipFile(src, 'r') as zf:
2102 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002103
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002104 elif args.extract is not None:
2105 src, curdir = args.extract
2106 with ZipFile(src, 'r') as zf:
2107 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002108
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002109 elif args.create is not None:
2110 zip_name = args.create.pop(0)
2111 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002112
2113 def addToZip(zf, path, zippath):
2114 if os.path.isfile(path):
2115 zf.write(path, zippath, ZIP_DEFLATED)
2116 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002117 if zippath:
2118 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002119 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002120 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002121 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002122 # else: ignore
2123
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002124 with ZipFile(zip_name, 'w') as zf:
2125 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002126 zippath = os.path.basename(path)
2127 if not zippath:
2128 zippath = os.path.basename(os.path.dirname(path))
2129 if zippath in ('', os.curdir, os.pardir):
2130 zippath = ''
2131 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002132
2133if __name__ == "__main__":
2134 main()