blob: 7f237783773dc33548b257d4062841a5e4499558 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000162def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164 if _EndRecData(fp):
165 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200166 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000168 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000170def is_zipfile(filename):
171 """Quickly see if a file is a ZIP file by checking the magic number.
172
173 The filename argument may be a file or file-like object too.
174 """
175 result = False
176 try:
177 if hasattr(filename, "read"):
178 result = _check_zipfile(fp=filename)
179 else:
180 with open(filename, "rb") as fp:
181 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200182 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000183 pass
184 return result
185
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186def _EndRecData64(fpin, offset, endrec):
187 """
188 Read the ZIP64 end-of-archive records and use that to update endrec
189 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000190 try:
191 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000193 # If the seek fails, the file is not large enough to contain a ZIP64
194 # end-of-archive record, so just return the end record we were given.
195 return endrec
196
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200198 if len(data) != sizeEndCentDir64Locator:
199 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000200 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
201 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 return endrec
203
204 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000205 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
207 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
209 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200210 if len(data) != sizeEndCentDir64:
211 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200213 dircount, dircount2, dirsize, diroffset = \
214 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000215 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 return endrec
217
218 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000219 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec[_ECD_DISK_NUMBER] = disk_num
221 endrec[_ECD_DISK_START] = disk_dir
222 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
223 endrec[_ECD_ENTRIES_TOTAL] = dircount2
224 endrec[_ECD_SIZE] = dirsize
225 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000226 return endrec
227
228
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229def _EndRecData(fpin):
230 """Return data from the "End of Central Directory" record, or None.
231
232 The data is a list of the nine items in the ZIP "End of central dir"
233 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234
235 # Determine file size
236 fpin.seek(0, 2)
237 filesize = fpin.tell()
238
239 # Check to see if this is ZIP file with no archive comment (the
240 # "end of central directory" structure should be the last item in the
241 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000242 try:
243 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200244 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000245 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200247 if (len(data) == sizeEndCentDir and
248 data[0:4] == stringEndArchive and
249 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000250 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000251 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 endrec=list(endrec)
253
254 # Append a blank comment and record start offset
255 endrec.append(b"")
256 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000258 # Try to read the "Zip64 end of central directory" structure
259 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Either this is not a ZIP file, or it is a ZIP file with an archive
262 # comment. Search the end of the file for the "end of central directory"
263 # record signature. The comment is the last item in the ZIP file and may be
264 # up to 64K long. It is assumed that the "end of central directory" magic
265 # number does not appear in the comment.
266 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
267 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000268 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000269 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000270 if start >= 0:
271 # found the magic number; attempt to unpack and interpret
272 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if len(recData) != sizeEndCentDir:
274 # Zip file is corrupted.
275 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000276 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400277 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
278 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
279 endrec.append(comment)
280 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000281
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 # Try to read the "Zip64 end of central directory" structure
283 return _EndRecData64(fpin, maxCommentStart + start - filesize,
284 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
286 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200287 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000288
Fred Drake484d7352000-10-02 21:14:52 +0000289
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000291 """Class with attributes describing each file in the ZIP archive."""
292
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000293 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200294 'orig_filename',
295 'filename',
296 'date_time',
297 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600298 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600338 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200371 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200439 if ln+4 > len(extra):
440 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
441 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000447 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448 elif ln == 0:
449 counts = ()
450 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300451 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000452
453 idx = 0
454
455 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000456 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000457 self.file_size = counts[idx]
458 idx += 1
459
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000460 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000461 self.compress_size = counts[idx]
462 idx += 1
463
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000464 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 old = self.header_offset
466 self.header_offset = counts[idx]
467 idx+=1
468
469 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000470
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200471 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200472 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200473 """Construct an appropriate ZipInfo for a file on the filesystem.
474
475 filename should be the path to a file or directory on the filesystem.
476
477 arcname is the name which it will have within the archive (by default,
478 this will be the same as filename, but without a drive letter and with
479 leading path separators removed).
480 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200481 if isinstance(filename, os.PathLike):
482 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200483 st = os.stat(filename)
484 isdir = stat.S_ISDIR(st.st_mode)
485 mtime = time.localtime(st.st_mtime)
486 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200487 if not strict_timestamps and date_time[0] < 1980:
488 date_time = (1980, 1, 1, 0, 0, 0)
489 elif not strict_timestamps and date_time[0] > 2107:
490 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200491 # Create ZipInfo instance to store file information
492 if arcname is None:
493 arcname = filename
494 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
495 while arcname[0] in (os.sep, os.altsep):
496 arcname = arcname[1:]
497 if isdir:
498 arcname += '/'
499 zinfo = cls(arcname, date_time)
500 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
501 if isdir:
502 zinfo.file_size = 0
503 zinfo.external_attr |= 0x10 # MS-DOS directory flag
504 else:
505 zinfo.file_size = st.st_size
506
507 return zinfo
508
509 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300510 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200511 return self.filename[-1] == '/'
512
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000513
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300514# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
515# internal keys. We noticed that a direct implementation is faster than
516# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000517
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300518_crctable = None
519def _gen_crc(crc):
520 for j in range(8):
521 if crc & 1:
522 crc = (crc >> 1) ^ 0xEDB88320
523 else:
524 crc >>= 1
525 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000526
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300527# ZIP supports a password-based form of encryption. Even though known
528# plaintext attacks have been found against it, it is still useful
529# to be able to get data out of such a file.
530#
531# Usage:
532# zd = _ZipDecrypter(mypwd)
533# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000534
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300535def _ZipDecrypter(pwd):
536 key0 = 305419896
537 key1 = 591751049
538 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000539
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300540 global _crctable
541 if _crctable is None:
542 _crctable = list(map(_gen_crc, range(256)))
543 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300545 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000546 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300547 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000548
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300549 def update_keys(c):
550 nonlocal key0, key1, key2
551 key0 = crc32(c, key0)
552 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
553 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
554 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000555
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300556 for p in pwd:
557 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000558
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300559 def decrypter(data):
560 """Decrypt a bytes object."""
561 result = bytearray()
562 append = result.append
563 for c in data:
564 k = key2 | 2
565 c ^= ((k * (k^1)) >> 8) & 0xFF
566 update_keys(c)
567 append(c)
568 return bytes(result)
569
570 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000571
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200572
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200573class LZMACompressor:
574
575 def __init__(self):
576 self._comp = None
577
578 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200579 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200580 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200581 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200582 ])
583 return struct.pack('<BBH', 9, 4, len(props)) + props
584
585 def compress(self, data):
586 if self._comp is None:
587 return self._init() + self._comp.compress(data)
588 return self._comp.compress(data)
589
590 def flush(self):
591 if self._comp is None:
592 return self._init() + self._comp.flush()
593 return self._comp.flush()
594
595
596class LZMADecompressor:
597
598 def __init__(self):
599 self._decomp = None
600 self._unconsumed = b''
601 self.eof = False
602
603 def decompress(self, data):
604 if self._decomp is None:
605 self._unconsumed += data
606 if len(self._unconsumed) <= 4:
607 return b''
608 psize, = struct.unpack('<H', self._unconsumed[2:4])
609 if len(self._unconsumed) <= 4 + psize:
610 return b''
611
612 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200613 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
614 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200615 ])
616 data = self._unconsumed[4 + psize:]
617 del self._unconsumed
618
619 result = self._decomp.decompress(data)
620 self.eof = self._decomp.eof
621 return result
622
623
624compressor_names = {
625 0: 'store',
626 1: 'shrink',
627 2: 'reduce',
628 3: 'reduce',
629 4: 'reduce',
630 5: 'reduce',
631 6: 'implode',
632 7: 'tokenize',
633 8: 'deflate',
634 9: 'deflate64',
635 10: 'implode',
636 12: 'bzip2',
637 14: 'lzma',
638 18: 'terse',
639 19: 'lz77',
640 97: 'wavpack',
641 98: 'ppmd',
642}
643
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200644def _check_compression(compression):
645 if compression == ZIP_STORED:
646 pass
647 elif compression == ZIP_DEFLATED:
648 if not zlib:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200651 elif compression == ZIP_BZIP2:
652 if not bz2:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200655 elif compression == ZIP_LZMA:
656 if not lzma:
657 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200658 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200659 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300660 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200661
662
Bo Baylesce237c72018-01-29 23:54:07 -0600663def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200664 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600665 if compresslevel is not None:
666 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
667 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600669 if compresslevel is not None:
670 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200671 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600672 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200673 elif compress_type == ZIP_LZMA:
674 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200675 else:
676 return None
677
678
679def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200680 if compress_type == ZIP_STORED:
681 return None
682 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200683 return zlib.decompressobj(-15)
684 elif compress_type == ZIP_BZIP2:
685 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200686 elif compress_type == ZIP_LZMA:
687 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200688 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200689 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200690 if descr:
691 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
692 else:
693 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694
695
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200696class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300697 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200698 self._file = file
699 self._pos = pos
700 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200701 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300702 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700703 self.seekable = file.seekable
704 self.tell = file.tell
705
706 def seek(self, offset, whence=0):
707 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200708 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700709 raise ValueError("Can't reposition in the ZIP file while "
710 "there is an open writing handle on it. "
711 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200712 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700713 self._pos = self._file.tell()
714 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200715
716 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200717 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300718 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300719 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300720 "is an open writing handle on it. "
721 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200722 self._file.seek(self._pos)
723 data = self._file.read(n)
724 self._pos = self._file.tell()
725 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200726
727 def close(self):
728 if self._file is not None:
729 fileobj = self._file
730 self._file = None
731 self._close(fileobj)
732
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200733# Provide the tell method for unseekable stream
734class _Tellable:
735 def __init__(self, fp):
736 self.fp = fp
737 self.offset = 0
738
739 def write(self, data):
740 n = self.fp.write(data)
741 self.offset += n
742 return n
743
744 def tell(self):
745 return self.offset
746
747 def flush(self):
748 self.fp.flush()
749
750 def close(self):
751 self.fp.close()
752
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200753
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000754class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000755 """File-like object for reading an archive member.
756 Is returned by ZipFile.open().
757 """
758
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000759 # Max size supported by decompressor.
760 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000761
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000762 # Read from compressed files in 4k blocks.
763 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000764
John Jolly066df4f2018-01-30 01:51:35 -0700765 # Chunk size to read during seek
766 MAX_SEEK_READ = 1 << 24
767
Łukasz Langae94980a2010-11-22 23:31:26 +0000768 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
769 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000770 self._fileobj = fileobj
771 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000772 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000773
Ezio Melotti92b47432010-01-28 01:44:41 +0000774 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000775 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200776 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000777
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200778 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000779
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200780 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000781 self._readbuffer = b''
782 self._offset = 0
783
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000784 self.newlines = None
785
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000786 # Adjust read size for encrypted files since the first 12 bytes
787 # are for the encryption/password information.
788 if self._decrypter is not None:
789 self._compress_left -= 12
790
791 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000792 self.name = zipinfo.filename
793
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000794 if hasattr(zipinfo, 'CRC'):
795 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000796 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000797 else:
798 self._expected_crc = None
799
John Jolly066df4f2018-01-30 01:51:35 -0700800 self._seekable = False
801 try:
802 if fileobj.seekable():
803 self._orig_compress_start = fileobj.tell()
804 self._orig_compress_size = zipinfo.compress_size
805 self._orig_file_size = zipinfo.file_size
806 self._orig_start_crc = self._running_crc
807 self._seekable = True
808 except AttributeError:
809 pass
810
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200811 def __repr__(self):
812 result = ['<%s.%s' % (self.__class__.__module__,
813 self.__class__.__qualname__)]
814 if not self.closed:
815 result.append(' name=%r mode=%r' % (self.name, self.mode))
816 if self._compress_type != ZIP_STORED:
817 result.append(' compress_type=%s' %
818 compressor_names.get(self._compress_type,
819 self._compress_type))
820 else:
821 result.append(' [closed]')
822 result.append('>')
823 return ''.join(result)
824
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000825 def readline(self, limit=-1):
826 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000828 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830
Serhiy Storchakae670be22016-06-11 19:32:44 +0300831 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000832 # Shortcut common case - newline found in buffer.
833 i = self._readbuffer.find(b'\n', self._offset) + 1
834 if i > 0:
835 line = self._readbuffer[self._offset: i]
836 self._offset = i
837 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838
Serhiy Storchakae670be22016-06-11 19:32:44 +0300839 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000840
841 def peek(self, n=1):
842 """Returns buffered bytes without advancing the position."""
843 if n > len(self._readbuffer) - self._offset:
844 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200845 if len(chunk) > self._offset:
846 self._readbuffer = chunk + self._readbuffer[self._offset:]
847 self._offset = 0
848 else:
849 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000850
851 # Return up to 512 bytes to reduce allocation overhead for tight loops.
852 return self._readbuffer[self._offset: self._offset + 512]
853
854 def readable(self):
855 return True
856
857 def read(self, n=-1):
858 """Read and return up to n bytes.
859 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000860 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200861 if n is None or n < 0:
862 buf = self._readbuffer[self._offset:]
863 self._readbuffer = b''
864 self._offset = 0
865 while not self._eof:
866 buf += self._read1(self.MAX_N)
867 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868
Antoine Pitrou78157b32012-06-23 16:44:48 +0200869 end = n + self._offset
870 if end < len(self._readbuffer):
871 buf = self._readbuffer[self._offset:end]
872 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200873 return buf
874
Antoine Pitrou78157b32012-06-23 16:44:48 +0200875 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200876 buf = self._readbuffer[self._offset:]
877 self._readbuffer = b''
878 self._offset = 0
879 while n > 0 and not self._eof:
880 data = self._read1(n)
881 if n < len(data):
882 self._readbuffer = data
883 self._offset = n
884 buf += data[:n]
885 break
886 buf += data
887 n -= len(data)
888 return buf
889
890 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000891 # Update the CRC using the given data.
892 if self._expected_crc is None:
893 # No need to compute the CRC if we don't have a reference value
894 return
Martin Panterb82032f2015-12-11 05:19:29 +0000895 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000896 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200897 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000898 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000899
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000900 def read1(self, n):
901 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 if n is None or n < 0:
904 buf = self._readbuffer[self._offset:]
905 self._readbuffer = b''
906 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300907 while not self._eof:
908 data = self._read1(self.MAX_N)
909 if data:
910 buf += data
911 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200912 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913
Antoine Pitrou78157b32012-06-23 16:44:48 +0200914 end = n + self._offset
915 if end < len(self._readbuffer):
916 buf = self._readbuffer[self._offset:end]
917 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200918 return buf
919
Antoine Pitrou78157b32012-06-23 16:44:48 +0200920 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200921 buf = self._readbuffer[self._offset:]
922 self._readbuffer = b''
923 self._offset = 0
924 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300925 while not self._eof:
926 data = self._read1(n)
927 if n < len(data):
928 self._readbuffer = data
929 self._offset = n
930 buf += data[:n]
931 break
932 if data:
933 buf += data
934 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200935 return buf
936
937 def _read1(self, n):
938 # Read up to n compressed bytes with at most one read() system call,
939 # decrypt and decompress them.
940 if self._eof or n <= 0:
941 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000942
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000943 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200944 if self._compress_type == ZIP_DEFLATED:
945 ## Handle unconsumed data.
946 data = self._decompressor.unconsumed_tail
947 if n > len(data):
948 data += self._read2(n - len(data))
949 else:
950 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000951
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200952 if self._compress_type == ZIP_STORED:
953 self._eof = self._compress_left <= 0
954 elif self._compress_type == ZIP_DEFLATED:
955 n = max(n, self.MIN_READ_SIZE)
956 data = self._decompressor.decompress(data, n)
957 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200958 self._compress_left <= 0 and
959 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200960 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000961 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 else:
963 data = self._decompressor.decompress(data)
964 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200966 data = data[:self._left]
967 self._left -= len(data)
968 if self._left <= 0:
969 self._eof = True
970 self._update_crc(data)
971 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000972
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200973 def _read2(self, n):
974 if self._compress_left <= 0:
975 return b''
976
977 n = max(n, self.MIN_READ_SIZE)
978 n = min(n, self._compress_left)
979
980 data = self._fileobj.read(n)
981 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200982 if not data:
983 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200984
985 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300986 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000987 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988
Łukasz Langae94980a2010-11-22 23:31:26 +0000989 def close(self):
990 try:
991 if self._close_fileobj:
992 self._fileobj.close()
993 finally:
994 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995
John Jolly066df4f2018-01-30 01:51:35 -0700996 def seekable(self):
997 return self._seekable
998
999 def seek(self, offset, whence=0):
1000 if not self._seekable:
1001 raise io.UnsupportedOperation("underlying stream is not seekable")
1002 curr_pos = self.tell()
1003 if whence == 0: # Seek from start of file
1004 new_pos = offset
1005 elif whence == 1: # Seek from current position
1006 new_pos = curr_pos + offset
1007 elif whence == 2: # Seek from EOF
1008 new_pos = self._orig_file_size + offset
1009 else:
1010 raise ValueError("whence must be os.SEEK_SET (0), "
1011 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1012
1013 if new_pos > self._orig_file_size:
1014 new_pos = self._orig_file_size
1015
1016 if new_pos < 0:
1017 new_pos = 0
1018
1019 read_offset = new_pos - curr_pos
1020 buff_offset = read_offset + self._offset
1021
1022 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1023 # Just move the _offset index if the new position is in the _readbuffer
1024 self._offset = buff_offset
1025 read_offset = 0
1026 elif read_offset < 0:
1027 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001028 self._fileobj.seek(self._orig_compress_start)
1029 self._running_crc = self._orig_start_crc
1030 self._compress_left = self._orig_compress_size
1031 self._left = self._orig_file_size
1032 self._readbuffer = b''
1033 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001034 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001035 self._eof = False
1036 read_offset = new_pos
1037
1038 while read_offset > 0:
1039 read_len = min(self.MAX_SEEK_READ, read_offset)
1040 self.read(read_len)
1041 read_offset -= read_len
1042
1043 return self.tell()
1044
1045 def tell(self):
1046 if not self._seekable:
1047 raise io.UnsupportedOperation("underlying stream is not seekable")
1048 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1049 return filepos
1050
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001051
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001052class _ZipWriteFile(io.BufferedIOBase):
1053 def __init__(self, zf, zinfo, zip64):
1054 self._zinfo = zinfo
1055 self._zip64 = zip64
1056 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001057 self._compressor = _get_compressor(zinfo.compress_type,
1058 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001059 self._file_size = 0
1060 self._compress_size = 0
1061 self._crc = 0
1062
1063 @property
1064 def _fileobj(self):
1065 return self._zipfile.fp
1066
1067 def writable(self):
1068 return True
1069
1070 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001071 if self.closed:
1072 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001073 nbytes = len(data)
1074 self._file_size += nbytes
1075 self._crc = crc32(data, self._crc)
1076 if self._compressor:
1077 data = self._compressor.compress(data)
1078 self._compress_size += len(data)
1079 self._fileobj.write(data)
1080 return nbytes
1081
1082 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001083 if self.closed:
1084 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001085 super().close()
1086 # Flush any data from the compressor, and update header info
1087 if self._compressor:
1088 buf = self._compressor.flush()
1089 self._compress_size += len(buf)
1090 self._fileobj.write(buf)
1091 self._zinfo.compress_size = self._compress_size
1092 else:
1093 self._zinfo.compress_size = self._file_size
1094 self._zinfo.CRC = self._crc
1095 self._zinfo.file_size = self._file_size
1096
1097 # Write updated header info
1098 if self._zinfo.flag_bits & 0x08:
1099 # Write CRC and file sizes after the file data
1100 fmt = '<LQQ' if self._zip64 else '<LLL'
1101 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1102 self._zinfo.compress_size, self._zinfo.file_size))
1103 self._zipfile.start_dir = self._fileobj.tell()
1104 else:
1105 if not self._zip64:
1106 if self._file_size > ZIP64_LIMIT:
1107 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1108 'limit')
1109 if self._compress_size > ZIP64_LIMIT:
1110 raise RuntimeError('Compressed size unexpectedly exceeded '
1111 'ZIP64 limit')
1112 # Seek backwards and write file header (which will now include
1113 # correct CRC and file sizes)
1114
1115 # Preserve current position in file
1116 self._zipfile.start_dir = self._fileobj.tell()
1117 self._fileobj.seek(self._zinfo.header_offset)
1118 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1119 self._fileobj.seek(self._zipfile.start_dir)
1120
1121 self._zipfile._writing = False
1122
1123 # Successfully written: Add file to our caches
1124 self._zipfile.filelist.append(self._zinfo)
1125 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1126
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001128 """ Class with methods to open, read, write, close, list zip files.
1129
Bo Baylesce237c72018-01-29 23:54:07 -06001130 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1131 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001132
Fred Drake3d9091e2001-03-26 15:49:24 +00001133 file: Either the path to the file, or a file-like object.
1134 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001135 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1136 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001137 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1138 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001139 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1140 needed, otherwise it will raise an exception when this would
1141 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001142 compresslevel: None (default for the given compression type) or an integer
1143 specifying the level to pass to the compressor.
1144 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1145 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1146 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001147
Fred Drake3d9091e2001-03-26 15:49:24 +00001148 """
Fred Drake484d7352000-10-02 21:14:52 +00001149
Fred Drake90eac282001-02-28 05:29:34 +00001150 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001151 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001152
Bo Baylesce237c72018-01-29 23:54:07 -06001153 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001154 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001155 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1156 or append 'a'."""
1157 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001158 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001159
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001160 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001161
1162 self._allowZip64 = allowZip64
1163 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001164 self.debug = 0 # Level of printing: 0 through 3
1165 self.NameToInfo = {} # Find file info given name
1166 self.filelist = [] # List of ZipInfo instances for archive
1167 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001168 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001169 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001170 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001171 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001172 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001173
Fred Drake3d9091e2001-03-26 15:49:24 +00001174 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001175 if isinstance(file, os.PathLike):
1176 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001177 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001178 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001179 self._filePassed = 0
1180 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001181 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1182 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001183 filemode = modeDict[mode]
1184 while True:
1185 try:
1186 self.fp = io.open(file, filemode)
1187 except OSError:
1188 if filemode in modeDict:
1189 filemode = modeDict[filemode]
1190 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001191 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001192 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001193 else:
1194 self._filePassed = 1
1195 self.fp = file
1196 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001197 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001198 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001199 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001200 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001201
Antoine Pitrou17babc52012-11-17 23:50:08 +01001202 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001203 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001204 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001205 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001206 # set the modified flag so central directory gets written
1207 # even if no files are added to the archive
1208 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001209 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001210 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001211 except (AttributeError, OSError):
1212 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001213 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001214 self._seekable = False
1215 else:
1216 # Some file-like objects can provide tell() but not seek()
1217 try:
1218 self.fp.seek(self.start_dir)
1219 except (AttributeError, OSError):
1220 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001221 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001222 try:
1223 # See if file is a zip file
1224 self._RealGetContents()
1225 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001226 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001227 except BadZipFile:
1228 # file is not a zip file, just append
1229 self.fp.seek(0, 2)
1230
1231 # set the modified flag so central directory gets written
1232 # even if no files are added to the archive
1233 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001234 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001235 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001236 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001237 except:
1238 fp = self.fp
1239 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001240 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001241 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001243 def __enter__(self):
1244 return self
1245
1246 def __exit__(self, type, value, traceback):
1247 self.close()
1248
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001249 def __repr__(self):
1250 result = ['<%s.%s' % (self.__class__.__module__,
1251 self.__class__.__qualname__)]
1252 if self.fp is not None:
1253 if self._filePassed:
1254 result.append(' file=%r' % self.fp)
1255 elif self.filename is not None:
1256 result.append(' filename=%r' % self.filename)
1257 result.append(' mode=%r' % self.mode)
1258 else:
1259 result.append(' [closed]')
1260 result.append('>')
1261 return ''.join(result)
1262
Tim Peters7d3bad62001-04-04 18:56:49 +00001263 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001264 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001265 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001266 try:
1267 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001268 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001269 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001270 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001271 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001272 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001273 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001274 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1275 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001276 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001277
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001278 # "concat" is zero, unless zip was concatenated to another file
1279 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001280 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1281 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001282 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001283
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001284 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001285 inferred = concat + offset_cd
1286 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001288 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001289 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001290 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001291 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001292 total = 0
1293 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001294 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001295 if len(centdir) != sizeCentralDir:
1296 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001297 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001298 if centdir[_CD_SIGNATURE] != stringCentralDir:
1299 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001300 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001301 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001302 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001303 flags = centdir[5]
1304 if flags & 0x800:
1305 # UTF-8 file names extension
1306 filename = filename.decode('utf-8')
1307 else:
1308 # Historical ZIP filename encoding
1309 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001311 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001312 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1313 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001314 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001315 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001316 x.flag_bits, x.compress_type, t, d,
1317 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001318 if x.extract_version > MAX_EXTRACT_VERSION:
1319 raise NotImplementedError("zip file version %.1f" %
1320 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1322 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001323 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001325 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001326
1327 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001328 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001329 self.filelist.append(x)
1330 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001331
1332 # update total bytes read from central directory
1333 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1334 + centdir[_CD_EXTRA_FIELD_LENGTH]
1335 + centdir[_CD_COMMENT_LENGTH])
1336
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001338 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001339
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340
1341 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001342 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001343 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344
1345 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001346 """Return a list of class ZipInfo instances for files in the
1347 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001348 return self.filelist
1349
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001350 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001351 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001352 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1353 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001354 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001355 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001356 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1357 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358
1359 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001360 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001361 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 for zinfo in self.filelist:
1363 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001364 # Read by chunks, to avoid an OverflowError or a
1365 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001366 with self.open(zinfo.filename, "r") as f:
1367 while f.read(chunk_size): # Check CRC-32
1368 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001369 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001370 return zinfo.filename
1371
1372 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001373 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001374 info = self.NameToInfo.get(name)
1375 if info is None:
1376 raise KeyError(
1377 'There is no item named %r in the archive' % name)
1378
1379 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001380
Thomas Wouterscf297e42007-02-23 15:07:44 +00001381 def setpassword(self, pwd):
1382 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001383 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001384 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001385 if pwd:
1386 self.pwd = pwd
1387 else:
1388 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001389
R David Murrayf50b38a2012-04-12 18:44:58 -04001390 @property
1391 def comment(self):
1392 """The comment text associated with the ZIP file."""
1393 return self._comment
1394
1395 @comment.setter
1396 def comment(self, comment):
1397 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001398 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001399 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001400 if len(comment) > ZIP_MAX_COMMENT:
1401 import warnings
1402 warnings.warn('Archive comment is too long; truncating to %d bytes'
1403 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001404 comment = comment[:ZIP_MAX_COMMENT]
1405 self._comment = comment
1406 self._didModify = True
1407
Thomas Wouterscf297e42007-02-23 15:07:44 +00001408 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001409 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001410 with self.open(name, "r", pwd) as fp:
1411 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001412
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001413 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001414 """Return file-like object for 'name'.
1415
1416 name is a string for the file name within the ZIP file, or a ZipInfo
1417 object.
1418
1419 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1420 write to a file newly added to the archive.
1421
1422 pwd is the password to decrypt files (only used for reading).
1423
1424 When writing, if the file size is not known in advance but may exceed
1425 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1426 files. If the size is known in advance, it is best to pass a ZipInfo
1427 instance for name, with zinfo.file_size set.
1428 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001429 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001430 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001431 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001432 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001433 if pwd and (mode == "w"):
1434 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001435 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001436 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001437 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001438
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001439 # Make sure we have an info object
1440 if isinstance(name, ZipInfo):
1441 # 'name' is already an info object
1442 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001443 elif mode == 'w':
1444 zinfo = ZipInfo(name)
1445 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001446 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001447 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001448 # Get info object for name
1449 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001450
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001451 if mode == 'w':
1452 return self._open_to_write(zinfo, force_zip64=force_zip64)
1453
1454 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001455 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001456 "is an open writing handle on it. "
1457 "Close the writing handle before trying to read.")
1458
1459 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001460 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001461 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1462 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001463 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001464 # Skip the file header:
1465 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001466 if len(fheader) != sizeFileHeader:
1467 raise BadZipFile("Truncated file header")
1468 fheader = struct.unpack(structFileHeader, fheader)
1469 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001470 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001471
Antoine Pitrou17babc52012-11-17 23:50:08 +01001472 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1473 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1474 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001475
Antoine Pitrou8572da52012-11-17 23:52:05 +01001476 if zinfo.flag_bits & 0x20:
1477 # Zip 2.7: compressed patched data
1478 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001479
Antoine Pitrou8572da52012-11-17 23:52:05 +01001480 if zinfo.flag_bits & 0x40:
1481 # strong encryption
1482 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001483
Antoine Pitrou17babc52012-11-17 23:50:08 +01001484 if zinfo.flag_bits & 0x800:
1485 # UTF-8 filename
1486 fname_str = fname.decode("utf-8")
1487 else:
1488 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001489
Antoine Pitrou17babc52012-11-17 23:50:08 +01001490 if fname_str != zinfo.orig_filename:
1491 raise BadZipFile(
1492 'File name in directory %r and header %r differ.'
1493 % (zinfo.orig_filename, fname))
1494
1495 # check for encrypted flag & handle password
1496 is_encrypted = zinfo.flag_bits & 0x1
1497 zd = None
1498 if is_encrypted:
1499 if not pwd:
1500 pwd = self.pwd
1501 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001502 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001503 "required for extraction" % name)
1504
1505 zd = _ZipDecrypter(pwd)
1506 # The first 12 bytes in the cypher stream is an encryption header
1507 # used to strengthen the algorithm. The first 11 bytes are
1508 # completely random, while the 12th contains the MSB of the CRC,
1509 # or the MSB of the file time depending on the header type
1510 # and is used to check the correctness of the password.
1511 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001512 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001513 if zinfo.flag_bits & 0x8:
1514 # compare against the file type from extended local headers
1515 check_byte = (zinfo._raw_time >> 8) & 0xff
1516 else:
1517 # compare against the CRC otherwise
1518 check_byte = (zinfo.CRC >> 24) & 0xff
1519 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001520 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001521
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001522 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001523 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001524 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001525 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001526
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001527 def _open_to_write(self, zinfo, force_zip64=False):
1528 if force_zip64 and not self._allowZip64:
1529 raise ValueError(
1530 "force_zip64 is True, but allowZip64 was False when opening "
1531 "the ZIP file."
1532 )
1533 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001534 raise ValueError("Can't write to the ZIP file while there is "
1535 "another write handle open on it. "
1536 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001537
1538 # Sizes and CRC are overwritten with correct data after processing the file
1539 if not hasattr(zinfo, 'file_size'):
1540 zinfo.file_size = 0
1541 zinfo.compress_size = 0
1542 zinfo.CRC = 0
1543
1544 zinfo.flag_bits = 0x00
1545 if zinfo.compress_type == ZIP_LZMA:
1546 # Compressed data includes an end-of-stream (EOS) marker
1547 zinfo.flag_bits |= 0x02
1548 if not self._seekable:
1549 zinfo.flag_bits |= 0x08
1550
1551 if not zinfo.external_attr:
1552 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1553
1554 # Compressed size can be larger than uncompressed size
1555 zip64 = self._allowZip64 and \
1556 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1557
1558 if self._seekable:
1559 self.fp.seek(self.start_dir)
1560 zinfo.header_offset = self.fp.tell()
1561
1562 self._writecheck(zinfo)
1563 self._didModify = True
1564
1565 self.fp.write(zinfo.FileHeader(zip64))
1566
1567 self._writing = True
1568 return _ZipWriteFile(self, zinfo, zip64)
1569
Christian Heimes790c8232008-01-07 21:14:23 +00001570 def extract(self, member, path=None, pwd=None):
1571 """Extract a member from the archive to the current working directory,
1572 using its full name. Its file information is extracted as accurately
1573 as possible. `member' may be a filename or a ZipInfo object. You can
1574 specify a different directory using `path'.
1575 """
Christian Heimes790c8232008-01-07 21:14:23 +00001576 if path is None:
1577 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001578 else:
1579 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001580
1581 return self._extract_member(member, path, pwd)
1582
1583 def extractall(self, path=None, members=None, pwd=None):
1584 """Extract all members from the archive to the current working
1585 directory. `path' specifies a different directory to extract to.
1586 `members' is optional and must be a subset of the list returned
1587 by namelist().
1588 """
1589 if members is None:
1590 members = self.namelist()
1591
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001592 if path is None:
1593 path = os.getcwd()
1594 else:
1595 path = os.fspath(path)
1596
Christian Heimes790c8232008-01-07 21:14:23 +00001597 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001598 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001599
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001600 @classmethod
1601 def _sanitize_windows_name(cls, arcname, pathsep):
1602 """Replace bad characters and remove trailing dots from parts."""
1603 table = cls._windows_illegal_name_trans_table
1604 if not table:
1605 illegal = ':<>|"?*'
1606 table = str.maketrans(illegal, '_' * len(illegal))
1607 cls._windows_illegal_name_trans_table = table
1608 arcname = arcname.translate(table)
1609 # remove trailing dots
1610 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1611 # rejoin, removing empty parts.
1612 arcname = pathsep.join(x for x in arcname if x)
1613 return arcname
1614
Christian Heimes790c8232008-01-07 21:14:23 +00001615 def _extract_member(self, member, targetpath, pwd):
1616 """Extract the ZipInfo object 'member' to a physical
1617 file on the path targetpath.
1618 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001619 if not isinstance(member, ZipInfo):
1620 member = self.getinfo(member)
1621
Christian Heimes790c8232008-01-07 21:14:23 +00001622 # build the destination pathname, replacing
1623 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001624 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001625
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001626 if os.path.altsep:
1627 arcname = arcname.replace(os.path.altsep, os.path.sep)
1628 # interpret absolute pathname as relative, remove drive letter or
1629 # UNC path, redundant separators, "." and ".." components.
1630 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001631 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001632 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001633 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001634 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001635 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001636 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001637
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001638 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001639 targetpath = os.path.normpath(targetpath)
1640
1641 # Create all upper directories if necessary.
1642 upperdirs = os.path.dirname(targetpath)
1643 if upperdirs and not os.path.exists(upperdirs):
1644 os.makedirs(upperdirs)
1645
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001646 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001647 if not os.path.isdir(targetpath):
1648 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001649 return targetpath
1650
Antoine Pitrou17babc52012-11-17 23:50:08 +01001651 with self.open(member, pwd=pwd) as source, \
1652 open(targetpath, "wb") as target:
1653 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001654
1655 return targetpath
1656
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001657 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001658 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001659 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001660 import warnings
1661 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001662 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001663 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001664 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001665 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001666 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001667 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001668 if not self._allowZip64:
1669 requires_zip64 = None
1670 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1671 requires_zip64 = "Files count"
1672 elif zinfo.file_size > ZIP64_LIMIT:
1673 requires_zip64 = "Filesize"
1674 elif zinfo.header_offset > ZIP64_LIMIT:
1675 requires_zip64 = "Zipfile size"
1676 if requires_zip64:
1677 raise LargeZipFile(requires_zip64 +
1678 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001679
Bo Baylesce237c72018-01-29 23:54:07 -06001680 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001681 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001682 """Put the bytes from filename into the archive under the name
1683 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001684 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001685 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001686 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001687 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001688 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001689 "Can't write to ZIP archive while an open writing handle exists"
1690 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001691
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001692 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001693 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001694
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001695 if zinfo.is_dir():
1696 zinfo.compress_size = 0
1697 zinfo.CRC = 0
1698 else:
1699 if compress_type is not None:
1700 zinfo.compress_type = compress_type
1701 else:
1702 zinfo.compress_type = self.compression
1703
Bo Baylesce237c72018-01-29 23:54:07 -06001704 if compresslevel is not None:
1705 zinfo._compresslevel = compresslevel
1706 else:
1707 zinfo._compresslevel = self.compresslevel
1708
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001709 if zinfo.is_dir():
1710 with self._lock:
1711 if self._seekable:
1712 self.fp.seek(self.start_dir)
1713 zinfo.header_offset = self.fp.tell() # Start of header bytes
1714 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001715 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001716 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001717
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001718 self._writecheck(zinfo)
1719 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001720
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001721 self.filelist.append(zinfo)
1722 self.NameToInfo[zinfo.filename] = zinfo
1723 self.fp.write(zinfo.FileHeader(False))
1724 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001725 else:
1726 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1727 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001728
Bo Baylesce237c72018-01-29 23:54:07 -06001729 def writestr(self, zinfo_or_arcname, data,
1730 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001731 """Write a file into the archive. The contents is 'data', which
1732 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1733 it is encoded as UTF-8 first.
1734 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001735 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001736 if isinstance(data, str):
1737 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001738 if not isinstance(zinfo_or_arcname, ZipInfo):
1739 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001740 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001741 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001742 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001743 if zinfo.filename[-1] == '/':
1744 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1745 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1746 else:
1747 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001748 else:
1749 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001750
1751 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001752 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001753 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001754 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001755 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001756 "Can't write to ZIP archive while an open writing handle exists."
1757 )
1758
1759 if compress_type is not None:
1760 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001761
Bo Baylesce237c72018-01-29 23:54:07 -06001762 if compresslevel is not None:
1763 zinfo._compresslevel = compresslevel
1764
Guido van Rossum85825dc2007-08-27 17:03:28 +00001765 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001766 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001767 with self.open(zinfo, mode='w') as dest:
1768 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001769
1770 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001771 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001772 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001773
1774 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001775 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001776 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001777 if self.fp is None:
1778 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001779
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001780 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001781 raise ValueError("Can't close the ZIP file while there is "
1782 "an open writing handle on it. "
1783 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001784
Antoine Pitrou17babc52012-11-17 23:50:08 +01001785 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001786 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001787 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001788 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001789 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001790 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001791 finally:
1792 fp = self.fp
1793 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001794 self._fpclose(fp)
1795
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001796 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001797 for zinfo in self.filelist: # write central directory
1798 dt = zinfo.date_time
1799 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1800 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1801 extra = []
1802 if zinfo.file_size > ZIP64_LIMIT \
1803 or zinfo.compress_size > ZIP64_LIMIT:
1804 extra.append(zinfo.file_size)
1805 extra.append(zinfo.compress_size)
1806 file_size = 0xffffffff
1807 compress_size = 0xffffffff
1808 else:
1809 file_size = zinfo.file_size
1810 compress_size = zinfo.compress_size
1811
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001812 if zinfo.header_offset > ZIP64_LIMIT:
1813 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001814 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001815 else:
1816 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001817
1818 extra_data = zinfo.extra
1819 min_version = 0
1820 if extra:
1821 # Append a ZIP64 field to the extra's
1822 extra_data = struct.pack(
1823 '<HH' + 'Q'*len(extra),
1824 1, 8*len(extra), *extra) + extra_data
1825
1826 min_version = ZIP64_VERSION
1827
1828 if zinfo.compress_type == ZIP_BZIP2:
1829 min_version = max(BZIP2_VERSION, min_version)
1830 elif zinfo.compress_type == ZIP_LZMA:
1831 min_version = max(LZMA_VERSION, min_version)
1832
1833 extract_version = max(min_version, zinfo.extract_version)
1834 create_version = max(min_version, zinfo.create_version)
1835 try:
1836 filename, flag_bits = zinfo._encodeFilenameFlags()
1837 centdir = struct.pack(structCentralDir,
1838 stringCentralDir, create_version,
1839 zinfo.create_system, extract_version, zinfo.reserved,
1840 flag_bits, zinfo.compress_type, dostime, dosdate,
1841 zinfo.CRC, compress_size, file_size,
1842 len(filename), len(extra_data), len(zinfo.comment),
1843 0, zinfo.internal_attr, zinfo.external_attr,
1844 header_offset)
1845 except DeprecationWarning:
1846 print((structCentralDir, stringCentralDir, create_version,
1847 zinfo.create_system, extract_version, zinfo.reserved,
1848 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1849 zinfo.CRC, compress_size, file_size,
1850 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1851 0, zinfo.internal_attr, zinfo.external_attr,
1852 header_offset), file=sys.stderr)
1853 raise
1854 self.fp.write(centdir)
1855 self.fp.write(filename)
1856 self.fp.write(extra_data)
1857 self.fp.write(zinfo.comment)
1858
1859 pos2 = self.fp.tell()
1860 # Write end-of-zip-archive record
1861 centDirCount = len(self.filelist)
1862 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001863 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001864 requires_zip64 = None
1865 if centDirCount > ZIP_FILECOUNT_LIMIT:
1866 requires_zip64 = "Files count"
1867 elif centDirOffset > ZIP64_LIMIT:
1868 requires_zip64 = "Central directory offset"
1869 elif centDirSize > ZIP64_LIMIT:
1870 requires_zip64 = "Central directory size"
1871 if requires_zip64:
1872 # Need to write the ZIP64 end-of-archive records
1873 if not self._allowZip64:
1874 raise LargeZipFile(requires_zip64 +
1875 " would require ZIP64 extensions")
1876 zip64endrec = struct.pack(
1877 structEndArchive64, stringEndArchive64,
1878 44, 45, 45, 0, 0, centDirCount, centDirCount,
1879 centDirSize, centDirOffset)
1880 self.fp.write(zip64endrec)
1881
1882 zip64locrec = struct.pack(
1883 structEndArchive64Locator,
1884 stringEndArchive64Locator, 0, pos2, 1)
1885 self.fp.write(zip64locrec)
1886 centDirCount = min(centDirCount, 0xFFFF)
1887 centDirSize = min(centDirSize, 0xFFFFFFFF)
1888 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1889
1890 endrec = struct.pack(structEndArchive, stringEndArchive,
1891 0, 0, centDirCount, centDirCount,
1892 centDirSize, centDirOffset, len(self._comment))
1893 self.fp.write(endrec)
1894 self.fp.write(self._comment)
1895 self.fp.flush()
1896
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001897 def _fpclose(self, fp):
1898 assert self._fileRefCnt > 0
1899 self._fileRefCnt -= 1
1900 if not self._fileRefCnt and not self._filePassed:
1901 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001902
1903
1904class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001905 """Class to create ZIP archives with Python library files and packages."""
1906
Georg Brandl8334fd92010-12-04 10:26:46 +00001907 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001908 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001909 ZipFile.__init__(self, file, mode=mode, compression=compression,
1910 allowZip64=allowZip64)
1911 self._optimize = optimize
1912
Christian Tismer59202e52013-10-21 03:59:23 +02001913 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001914 """Add all files from "pathname" to the ZIP archive.
1915
Fred Drake484d7352000-10-02 21:14:52 +00001916 If pathname is a package directory, search the directory and
1917 all package subdirectories recursively for all *.py and enter
1918 the modules into the archive. If pathname is a plain
1919 directory, listdir *.py and enter all modules. Else, pathname
1920 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001921 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001922 This method will compile the module.py into module.pyc if
1923 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001924 If filterfunc(pathname) is given, it is called with every argument.
1925 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001926 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001927 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001928 if filterfunc and not filterfunc(pathname):
1929 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001930 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001931 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001932 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001933 dir, name = os.path.split(pathname)
1934 if os.path.isdir(pathname):
1935 initname = os.path.join(pathname, "__init__.py")
1936 if os.path.isfile(initname):
1937 # This is a package directory, add it
1938 if basename:
1939 basename = "%s/%s" % (basename, name)
1940 else:
1941 basename = name
1942 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001943 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001944 fname, arcname = self._get_codename(initname[0:-3], basename)
1945 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001946 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001947 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001948 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001949 dirlist.remove("__init__.py")
1950 # Add all *.py files and package subdirectories
1951 for filename in dirlist:
1952 path = os.path.join(pathname, filename)
1953 root, ext = os.path.splitext(filename)
1954 if os.path.isdir(path):
1955 if os.path.isfile(os.path.join(path, "__init__.py")):
1956 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001957 self.writepy(path, basename,
1958 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001959 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001960 if filterfunc and not filterfunc(path):
1961 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001962 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001963 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001964 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001965 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001966 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001967 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001968 self.write(fname, arcname)
1969 else:
1970 # This is NOT a package directory, add its files at top level
1971 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001972 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001973 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001974 path = os.path.join(pathname, filename)
1975 root, ext = os.path.splitext(filename)
1976 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001977 if filterfunc and not filterfunc(path):
1978 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001979 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001980 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001981 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001982 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001983 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001984 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001985 self.write(fname, arcname)
1986 else:
1987 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001988 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001989 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001990 fname, arcname = self._get_codename(pathname[0:-3], basename)
1991 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001992 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 self.write(fname, arcname)
1994
1995 def _get_codename(self, pathname, basename):
1996 """Return (filename, archivename) for the path.
1997
Fred Drake484d7352000-10-02 21:14:52 +00001998 Given a module name path, return the correct file path and
1999 archive name, compiling if necessary. For example, given
2000 /python/lib/string, return (/python/lib/string.pyc, string).
2001 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002002 def _compile(file, optimize=-1):
2003 import py_compile
2004 if self.debug:
2005 print("Compiling", file)
2006 try:
2007 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002008 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002009 print(err.msg)
2010 return False
2011 return True
2012
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002013 file_py = pathname + ".py"
2014 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002015 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2016 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2017 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002018 if self._optimize == -1:
2019 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002020 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002021 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2022 # Use .pyc file.
2023 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002024 elif (os.path.isfile(pycache_opt0) and
2025 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002026 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2027 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002028 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002029 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002030 elif (os.path.isfile(pycache_opt1) and
2031 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2032 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002033 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002034 fname = pycache_opt1
2035 arcname = file_pyc
2036 elif (os.path.isfile(pycache_opt2) and
2037 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2038 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2039 # file name in the archive.
2040 fname = pycache_opt2
2041 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002042 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002043 # Compile py into PEP 3147 pyc file.
2044 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002045 if sys.flags.optimize == 0:
2046 fname = pycache_opt0
2047 elif sys.flags.optimize == 1:
2048 fname = pycache_opt1
2049 else:
2050 fname = pycache_opt2
2051 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002052 else:
2053 fname = arcname = file_py
2054 else:
2055 # new mode: use given optimization level
2056 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002057 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002058 arcname = file_pyc
2059 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002060 arcname = file_pyc
2061 if self._optimize == 1:
2062 fname = pycache_opt1
2063 elif self._optimize == 2:
2064 fname = pycache_opt2
2065 else:
2066 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2067 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002068 if not (os.path.isfile(fname) and
2069 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2070 if not _compile(file_py, optimize=self._optimize):
2071 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002072 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002073 if basename:
2074 archivename = "%s/%s" % (basename, archivename)
2075 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002076
2077
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002078def main(args=None):
2079 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002080
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002081 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002082 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002083 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002084 group.add_argument('-l', '--list', metavar='<zipfile>',
2085 help='Show listing of a zipfile')
2086 group.add_argument('-e', '--extract', nargs=2,
2087 metavar=('<zipfile>', '<output_dir>'),
2088 help='Extract zipfile into target dir')
2089 group.add_argument('-c', '--create', nargs='+',
2090 metavar=('<name>', '<file>'),
2091 help='Create zipfile from sources')
2092 group.add_argument('-t', '--test', metavar='<zipfile>',
2093 help='Test if a zipfile is valid')
2094 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002095
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002096 if args.test is not None:
2097 src = args.test
2098 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002099 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002100 if badfile:
2101 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002102 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002103
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002104 elif args.list is not None:
2105 src = args.list
2106 with ZipFile(src, 'r') as zf:
2107 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002108
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002109 elif args.extract is not None:
2110 src, curdir = args.extract
2111 with ZipFile(src, 'r') as zf:
2112 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002113
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002114 elif args.create is not None:
2115 zip_name = args.create.pop(0)
2116 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002117
2118 def addToZip(zf, path, zippath):
2119 if os.path.isfile(path):
2120 zf.write(path, zippath, ZIP_DEFLATED)
2121 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002122 if zippath:
2123 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002124 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002125 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002126 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002127 # else: ignore
2128
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002129 with ZipFile(zip_name, 'w') as zf:
2130 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002131 zippath = os.path.basename(path)
2132 if not zippath:
2133 zippath = os.path.basename(os.path.dirname(path))
2134 if zippath in ('', os.curdir, os.pardir):
2135 zippath = ''
2136 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002137
2138if __name__ == "__main__":
2139 main()