blob: 2757ce91cf485c9a2ed0a641d8124faa729d8e33 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000162def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164 if _EndRecData(fp):
165 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200166 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000168 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000170def is_zipfile(filename):
171 """Quickly see if a file is a ZIP file by checking the magic number.
172
173 The filename argument may be a file or file-like object too.
174 """
175 result = False
176 try:
177 if hasattr(filename, "read"):
178 result = _check_zipfile(fp=filename)
179 else:
180 with open(filename, "rb") as fp:
181 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200182 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000183 pass
184 return result
185
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186def _EndRecData64(fpin, offset, endrec):
187 """
188 Read the ZIP64 end-of-archive records and use that to update endrec
189 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000190 try:
191 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000193 # If the seek fails, the file is not large enough to contain a ZIP64
194 # end-of-archive record, so just return the end record we were given.
195 return endrec
196
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200198 if len(data) != sizeEndCentDir64Locator:
199 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000200 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
201 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 return endrec
203
204 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000205 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
207 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
209 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200210 if len(data) != sizeEndCentDir64:
211 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200213 dircount, dircount2, dirsize, diroffset = \
214 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000215 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 return endrec
217
218 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000219 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec[_ECD_DISK_NUMBER] = disk_num
221 endrec[_ECD_DISK_START] = disk_dir
222 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
223 endrec[_ECD_ENTRIES_TOTAL] = dircount2
224 endrec[_ECD_SIZE] = dirsize
225 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000226 return endrec
227
228
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229def _EndRecData(fpin):
230 """Return data from the "End of Central Directory" record, or None.
231
232 The data is a list of the nine items in the ZIP "End of central dir"
233 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234
235 # Determine file size
236 fpin.seek(0, 2)
237 filesize = fpin.tell()
238
239 # Check to see if this is ZIP file with no archive comment (the
240 # "end of central directory" structure should be the last item in the
241 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000242 try:
243 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200244 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000245 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200247 if (len(data) == sizeEndCentDir and
248 data[0:4] == stringEndArchive and
249 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000250 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000251 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 endrec=list(endrec)
253
254 # Append a blank comment and record start offset
255 endrec.append(b"")
256 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000258 # Try to read the "Zip64 end of central directory" structure
259 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Either this is not a ZIP file, or it is a ZIP file with an archive
262 # comment. Search the end of the file for the "end of central directory"
263 # record signature. The comment is the last item in the ZIP file and may be
264 # up to 64K long. It is assumed that the "end of central directory" magic
265 # number does not appear in the comment.
266 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
267 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000268 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000269 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000270 if start >= 0:
271 # found the magic number; attempt to unpack and interpret
272 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if len(recData) != sizeEndCentDir:
274 # Zip file is corrupted.
275 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000276 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400277 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
278 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
279 endrec.append(comment)
280 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000281
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 # Try to read the "Zip64 end of central directory" structure
283 return _EndRecData64(fpin, maxCommentStart + start - filesize,
284 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
286 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200287 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000288
Fred Drake484d7352000-10-02 21:14:52 +0000289
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000291 """Class with attributes describing each file in the ZIP archive."""
292
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000293 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200294 'orig_filename',
295 'filename',
296 'date_time',
297 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600298 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600338 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200371 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200439 if ln+4 > len(extra):
440 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
441 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000447 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448 elif ln == 0:
449 counts = ()
450 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300451 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000452
453 idx = 0
454
455 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000456 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000457 self.file_size = counts[idx]
458 idx += 1
459
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000460 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000461 self.compress_size = counts[idx]
462 idx += 1
463
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000464 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 old = self.header_offset
466 self.header_offset = counts[idx]
467 idx+=1
468
469 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000470
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200471 @classmethod
472 def from_file(cls, filename, arcname=None):
473 """Construct an appropriate ZipInfo for a file on the filesystem.
474
475 filename should be the path to a file or directory on the filesystem.
476
477 arcname is the name which it will have within the archive (by default,
478 this will be the same as filename, but without a drive letter and with
479 leading path separators removed).
480 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200481 if isinstance(filename, os.PathLike):
482 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200483 st = os.stat(filename)
484 isdir = stat.S_ISDIR(st.st_mode)
485 mtime = time.localtime(st.st_mtime)
486 date_time = mtime[0:6]
487 # Create ZipInfo instance to store file information
488 if arcname is None:
489 arcname = filename
490 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
491 while arcname[0] in (os.sep, os.altsep):
492 arcname = arcname[1:]
493 if isdir:
494 arcname += '/'
495 zinfo = cls(arcname, date_time)
496 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
497 if isdir:
498 zinfo.file_size = 0
499 zinfo.external_attr |= 0x10 # MS-DOS directory flag
500 else:
501 zinfo.file_size = st.st_size
502
503 return zinfo
504
505 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300506 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200507 return self.filename[-1] == '/'
508
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000509
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300510# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
511# internal keys. We noticed that a direct implementation is faster than
512# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000513
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300514_crctable = None
515def _gen_crc(crc):
516 for j in range(8):
517 if crc & 1:
518 crc = (crc >> 1) ^ 0xEDB88320
519 else:
520 crc >>= 1
521 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000522
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300523# ZIP supports a password-based form of encryption. Even though known
524# plaintext attacks have been found against it, it is still useful
525# to be able to get data out of such a file.
526#
527# Usage:
528# zd = _ZipDecrypter(mypwd)
529# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000530
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300531def _ZipDecrypter(pwd):
532 key0 = 305419896
533 key1 = 591751049
534 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000535
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300536 global _crctable
537 if _crctable is None:
538 _crctable = list(map(_gen_crc, range(256)))
539 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300541 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000542 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300543 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300545 def update_keys(c):
546 nonlocal key0, key1, key2
547 key0 = crc32(c, key0)
548 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
549 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
550 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552 for p in pwd:
553 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000554
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300555 def decrypter(data):
556 """Decrypt a bytes object."""
557 result = bytearray()
558 append = result.append
559 for c in data:
560 k = key2 | 2
561 c ^= ((k * (k^1)) >> 8) & 0xFF
562 update_keys(c)
563 append(c)
564 return bytes(result)
565
566 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200568
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200569class LZMACompressor:
570
571 def __init__(self):
572 self._comp = None
573
574 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200575 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200576 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200577 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200578 ])
579 return struct.pack('<BBH', 9, 4, len(props)) + props
580
581 def compress(self, data):
582 if self._comp is None:
583 return self._init() + self._comp.compress(data)
584 return self._comp.compress(data)
585
586 def flush(self):
587 if self._comp is None:
588 return self._init() + self._comp.flush()
589 return self._comp.flush()
590
591
592class LZMADecompressor:
593
594 def __init__(self):
595 self._decomp = None
596 self._unconsumed = b''
597 self.eof = False
598
599 def decompress(self, data):
600 if self._decomp is None:
601 self._unconsumed += data
602 if len(self._unconsumed) <= 4:
603 return b''
604 psize, = struct.unpack('<H', self._unconsumed[2:4])
605 if len(self._unconsumed) <= 4 + psize:
606 return b''
607
608 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200609 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
610 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611 ])
612 data = self._unconsumed[4 + psize:]
613 del self._unconsumed
614
615 result = self._decomp.decompress(data)
616 self.eof = self._decomp.eof
617 return result
618
619
620compressor_names = {
621 0: 'store',
622 1: 'shrink',
623 2: 'reduce',
624 3: 'reduce',
625 4: 'reduce',
626 5: 'reduce',
627 6: 'implode',
628 7: 'tokenize',
629 8: 'deflate',
630 9: 'deflate64',
631 10: 'implode',
632 12: 'bzip2',
633 14: 'lzma',
634 18: 'terse',
635 19: 'lz77',
636 97: 'wavpack',
637 98: 'ppmd',
638}
639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640def _check_compression(compression):
641 if compression == ZIP_STORED:
642 pass
643 elif compression == ZIP_DEFLATED:
644 if not zlib:
645 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200646 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647 elif compression == ZIP_BZIP2:
648 if not bz2:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200651 elif compression == ZIP_LZMA:
652 if not lzma:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200655 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300656 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200657
658
Bo Baylesce237c72018-01-29 23:54:07 -0600659def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200660 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600661 if compresslevel is not None:
662 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
663 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200664 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600665 if compresslevel is not None:
666 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600668 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200669 elif compress_type == ZIP_LZMA:
670 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200671 else:
672 return None
673
674
675def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200676 if compress_type == ZIP_STORED:
677 return None
678 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200679 return zlib.decompressobj(-15)
680 elif compress_type == ZIP_BZIP2:
681 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200682 elif compress_type == ZIP_LZMA:
683 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200685 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200686 if descr:
687 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
688 else:
689 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690
691
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200692class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300693 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200694 self._file = file
695 self._pos = pos
696 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200697 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300698 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700699 self.seekable = file.seekable
700 self.tell = file.tell
701
702 def seek(self, offset, whence=0):
703 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200704 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700705 raise ValueError("Can't reposition in the ZIP file while "
706 "there is an open writing handle on it. "
707 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200708 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700709 self._pos = self._file.tell()
710 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200711
712 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200713 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300714 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300715 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300716 "is an open writing handle on it. "
717 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200718 self._file.seek(self._pos)
719 data = self._file.read(n)
720 self._pos = self._file.tell()
721 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200722
723 def close(self):
724 if self._file is not None:
725 fileobj = self._file
726 self._file = None
727 self._close(fileobj)
728
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200729# Provide the tell method for unseekable stream
730class _Tellable:
731 def __init__(self, fp):
732 self.fp = fp
733 self.offset = 0
734
735 def write(self, data):
736 n = self.fp.write(data)
737 self.offset += n
738 return n
739
740 def tell(self):
741 return self.offset
742
743 def flush(self):
744 self.fp.flush()
745
746 def close(self):
747 self.fp.close()
748
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200749
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000750class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751 """File-like object for reading an archive member.
752 Is returned by ZipFile.open().
753 """
754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 # Max size supported by decompressor.
756 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000757
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758 # Read from compressed files in 4k blocks.
759 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000760
John Jolly066df4f2018-01-30 01:51:35 -0700761 # Chunk size to read during seek
762 MAX_SEEK_READ = 1 << 24
763
Łukasz Langae94980a2010-11-22 23:31:26 +0000764 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
765 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000766 self._fileobj = fileobj
767 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000768 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000769
Ezio Melotti92b47432010-01-28 01:44:41 +0000770 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000771 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200772 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000773
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200774 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000775
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200776 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 self._readbuffer = b''
778 self._offset = 0
779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 self.newlines = None
781
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000782 # Adjust read size for encrypted files since the first 12 bytes
783 # are for the encryption/password information.
784 if self._decrypter is not None:
785 self._compress_left -= 12
786
787 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788 self.name = zipinfo.filename
789
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000790 if hasattr(zipinfo, 'CRC'):
791 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000792 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000793 else:
794 self._expected_crc = None
795
John Jolly066df4f2018-01-30 01:51:35 -0700796 self._seekable = False
797 try:
798 if fileobj.seekable():
799 self._orig_compress_start = fileobj.tell()
800 self._orig_compress_size = zipinfo.compress_size
801 self._orig_file_size = zipinfo.file_size
802 self._orig_start_crc = self._running_crc
803 self._seekable = True
804 except AttributeError:
805 pass
806
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200807 def __repr__(self):
808 result = ['<%s.%s' % (self.__class__.__module__,
809 self.__class__.__qualname__)]
810 if not self.closed:
811 result.append(' name=%r mode=%r' % (self.name, self.mode))
812 if self._compress_type != ZIP_STORED:
813 result.append(' compress_type=%s' %
814 compressor_names.get(self._compress_type,
815 self._compress_type))
816 else:
817 result.append(' [closed]')
818 result.append('>')
819 return ''.join(result)
820
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000821 def readline(self, limit=-1):
822 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000824 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826
Serhiy Storchakae670be22016-06-11 19:32:44 +0300827 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000828 # Shortcut common case - newline found in buffer.
829 i = self._readbuffer.find(b'\n', self._offset) + 1
830 if i > 0:
831 line = self._readbuffer[self._offset: i]
832 self._offset = i
833 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
Serhiy Storchakae670be22016-06-11 19:32:44 +0300835 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000836
837 def peek(self, n=1):
838 """Returns buffered bytes without advancing the position."""
839 if n > len(self._readbuffer) - self._offset:
840 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200841 if len(chunk) > self._offset:
842 self._readbuffer = chunk + self._readbuffer[self._offset:]
843 self._offset = 0
844 else:
845 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000846
847 # Return up to 512 bytes to reduce allocation overhead for tight loops.
848 return self._readbuffer[self._offset: self._offset + 512]
849
850 def readable(self):
851 return True
852
853 def read(self, n=-1):
854 """Read and return up to n bytes.
855 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200857 if n is None or n < 0:
858 buf = self._readbuffer[self._offset:]
859 self._readbuffer = b''
860 self._offset = 0
861 while not self._eof:
862 buf += self._read1(self.MAX_N)
863 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
Antoine Pitrou78157b32012-06-23 16:44:48 +0200865 end = n + self._offset
866 if end < len(self._readbuffer):
867 buf = self._readbuffer[self._offset:end]
868 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200869 return buf
870
Antoine Pitrou78157b32012-06-23 16:44:48 +0200871 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200872 buf = self._readbuffer[self._offset:]
873 self._readbuffer = b''
874 self._offset = 0
875 while n > 0 and not self._eof:
876 data = self._read1(n)
877 if n < len(data):
878 self._readbuffer = data
879 self._offset = n
880 buf += data[:n]
881 break
882 buf += data
883 n -= len(data)
884 return buf
885
886 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000887 # Update the CRC using the given data.
888 if self._expected_crc is None:
889 # No need to compute the CRC if we don't have a reference value
890 return
Martin Panterb82032f2015-12-11 05:19:29 +0000891 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000892 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200893 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000894 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000895
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000896 def read1(self, n):
897 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200899 if n is None or n < 0:
900 buf = self._readbuffer[self._offset:]
901 self._readbuffer = b''
902 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300903 while not self._eof:
904 data = self._read1(self.MAX_N)
905 if data:
906 buf += data
907 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200908 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909
Antoine Pitrou78157b32012-06-23 16:44:48 +0200910 end = n + self._offset
911 if end < len(self._readbuffer):
912 buf = self._readbuffer[self._offset:end]
913 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 return buf
915
Antoine Pitrou78157b32012-06-23 16:44:48 +0200916 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200917 buf = self._readbuffer[self._offset:]
918 self._readbuffer = b''
919 self._offset = 0
920 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300921 while not self._eof:
922 data = self._read1(n)
923 if n < len(data):
924 self._readbuffer = data
925 self._offset = n
926 buf += data[:n]
927 break
928 if data:
929 buf += data
930 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200931 return buf
932
933 def _read1(self, n):
934 # Read up to n compressed bytes with at most one read() system call,
935 # decrypt and decompress them.
936 if self._eof or n <= 0:
937 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000939 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 if self._compress_type == ZIP_DEFLATED:
941 ## Handle unconsumed data.
942 data = self._decompressor.unconsumed_tail
943 if n > len(data):
944 data += self._read2(n - len(data))
945 else:
946 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000947
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200948 if self._compress_type == ZIP_STORED:
949 self._eof = self._compress_left <= 0
950 elif self._compress_type == ZIP_DEFLATED:
951 n = max(n, self.MIN_READ_SIZE)
952 data = self._decompressor.decompress(data, n)
953 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200954 self._compress_left <= 0 and
955 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200956 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000957 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200958 else:
959 data = self._decompressor.decompress(data)
960 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000961
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 data = data[:self._left]
963 self._left -= len(data)
964 if self._left <= 0:
965 self._eof = True
966 self._update_crc(data)
967 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000968
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200969 def _read2(self, n):
970 if self._compress_left <= 0:
971 return b''
972
973 n = max(n, self.MIN_READ_SIZE)
974 n = min(n, self._compress_left)
975
976 data = self._fileobj.read(n)
977 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200978 if not data:
979 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200980
981 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300982 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000983 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984
Łukasz Langae94980a2010-11-22 23:31:26 +0000985 def close(self):
986 try:
987 if self._close_fileobj:
988 self._fileobj.close()
989 finally:
990 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991
John Jolly066df4f2018-01-30 01:51:35 -0700992 def seekable(self):
993 return self._seekable
994
995 def seek(self, offset, whence=0):
996 if not self._seekable:
997 raise io.UnsupportedOperation("underlying stream is not seekable")
998 curr_pos = self.tell()
999 if whence == 0: # Seek from start of file
1000 new_pos = offset
1001 elif whence == 1: # Seek from current position
1002 new_pos = curr_pos + offset
1003 elif whence == 2: # Seek from EOF
1004 new_pos = self._orig_file_size + offset
1005 else:
1006 raise ValueError("whence must be os.SEEK_SET (0), "
1007 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1008
1009 if new_pos > self._orig_file_size:
1010 new_pos = self._orig_file_size
1011
1012 if new_pos < 0:
1013 new_pos = 0
1014
1015 read_offset = new_pos - curr_pos
1016 buff_offset = read_offset + self._offset
1017
1018 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1019 # Just move the _offset index if the new position is in the _readbuffer
1020 self._offset = buff_offset
1021 read_offset = 0
1022 elif read_offset < 0:
1023 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001024 self._fileobj.seek(self._orig_compress_start)
1025 self._running_crc = self._orig_start_crc
1026 self._compress_left = self._orig_compress_size
1027 self._left = self._orig_file_size
1028 self._readbuffer = b''
1029 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001030 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001031 self._eof = False
1032 read_offset = new_pos
1033
1034 while read_offset > 0:
1035 read_len = min(self.MAX_SEEK_READ, read_offset)
1036 self.read(read_len)
1037 read_offset -= read_len
1038
1039 return self.tell()
1040
1041 def tell(self):
1042 if not self._seekable:
1043 raise io.UnsupportedOperation("underlying stream is not seekable")
1044 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1045 return filepos
1046
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001047
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001048class _ZipWriteFile(io.BufferedIOBase):
1049 def __init__(self, zf, zinfo, zip64):
1050 self._zinfo = zinfo
1051 self._zip64 = zip64
1052 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001053 self._compressor = _get_compressor(zinfo.compress_type,
1054 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001055 self._file_size = 0
1056 self._compress_size = 0
1057 self._crc = 0
1058
1059 @property
1060 def _fileobj(self):
1061 return self._zipfile.fp
1062
1063 def writable(self):
1064 return True
1065
1066 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001067 if self.closed:
1068 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001069 nbytes = len(data)
1070 self._file_size += nbytes
1071 self._crc = crc32(data, self._crc)
1072 if self._compressor:
1073 data = self._compressor.compress(data)
1074 self._compress_size += len(data)
1075 self._fileobj.write(data)
1076 return nbytes
1077
1078 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001079 if self.closed:
1080 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001081 super().close()
1082 # Flush any data from the compressor, and update header info
1083 if self._compressor:
1084 buf = self._compressor.flush()
1085 self._compress_size += len(buf)
1086 self._fileobj.write(buf)
1087 self._zinfo.compress_size = self._compress_size
1088 else:
1089 self._zinfo.compress_size = self._file_size
1090 self._zinfo.CRC = self._crc
1091 self._zinfo.file_size = self._file_size
1092
1093 # Write updated header info
1094 if self._zinfo.flag_bits & 0x08:
1095 # Write CRC and file sizes after the file data
1096 fmt = '<LQQ' if self._zip64 else '<LLL'
1097 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1098 self._zinfo.compress_size, self._zinfo.file_size))
1099 self._zipfile.start_dir = self._fileobj.tell()
1100 else:
1101 if not self._zip64:
1102 if self._file_size > ZIP64_LIMIT:
1103 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1104 'limit')
1105 if self._compress_size > ZIP64_LIMIT:
1106 raise RuntimeError('Compressed size unexpectedly exceeded '
1107 'ZIP64 limit')
1108 # Seek backwards and write file header (which will now include
1109 # correct CRC and file sizes)
1110
1111 # Preserve current position in file
1112 self._zipfile.start_dir = self._fileobj.tell()
1113 self._fileobj.seek(self._zinfo.header_offset)
1114 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1115 self._fileobj.seek(self._zipfile.start_dir)
1116
1117 self._zipfile._writing = False
1118
1119 # Successfully written: Add file to our caches
1120 self._zipfile.filelist.append(self._zinfo)
1121 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1122
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001124 """ Class with methods to open, read, write, close, list zip files.
1125
Bo Baylesce237c72018-01-29 23:54:07 -06001126 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1127 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001128
Fred Drake3d9091e2001-03-26 15:49:24 +00001129 file: Either the path to the file, or a file-like object.
1130 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001131 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1132 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001133 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1134 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001135 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1136 needed, otherwise it will raise an exception when this would
1137 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001138 compresslevel: None (default for the given compression type) or an integer
1139 specifying the level to pass to the compressor.
1140 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1141 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1142 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001143
Fred Drake3d9091e2001-03-26 15:49:24 +00001144 """
Fred Drake484d7352000-10-02 21:14:52 +00001145
Fred Drake90eac282001-02-28 05:29:34 +00001146 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001147 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001148
Bo Baylesce237c72018-01-29 23:54:07 -06001149 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1150 compresslevel=None):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001151 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1152 or append 'a'."""
1153 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001154 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001155
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001156 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001157
1158 self._allowZip64 = allowZip64
1159 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001160 self.debug = 0 # Level of printing: 0 through 3
1161 self.NameToInfo = {} # Find file info given name
1162 self.filelist = [] # List of ZipInfo instances for archive
1163 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001164 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001165 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001166 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001167 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001168
Fred Drake3d9091e2001-03-26 15:49:24 +00001169 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001170 if isinstance(file, os.PathLike):
1171 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001172 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001173 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001174 self._filePassed = 0
1175 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001176 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1177 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001178 filemode = modeDict[mode]
1179 while True:
1180 try:
1181 self.fp = io.open(file, filemode)
1182 except OSError:
1183 if filemode in modeDict:
1184 filemode = modeDict[filemode]
1185 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001186 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001187 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001188 else:
1189 self._filePassed = 1
1190 self.fp = file
1191 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001192 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001193 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001194 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001195 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001196
Antoine Pitrou17babc52012-11-17 23:50:08 +01001197 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001198 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001199 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001200 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001201 # set the modified flag so central directory gets written
1202 # even if no files are added to the archive
1203 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001204 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001205 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001206 except (AttributeError, OSError):
1207 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001208 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001209 self._seekable = False
1210 else:
1211 # Some file-like objects can provide tell() but not seek()
1212 try:
1213 self.fp.seek(self.start_dir)
1214 except (AttributeError, OSError):
1215 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001216 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001217 try:
1218 # See if file is a zip file
1219 self._RealGetContents()
1220 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001221 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001222 except BadZipFile:
1223 # file is not a zip file, just append
1224 self.fp.seek(0, 2)
1225
1226 # set the modified flag so central directory gets written
1227 # even if no files are added to the archive
1228 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001229 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001230 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001231 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001232 except:
1233 fp = self.fp
1234 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001235 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001236 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001237
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001238 def __enter__(self):
1239 return self
1240
1241 def __exit__(self, type, value, traceback):
1242 self.close()
1243
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001244 def __repr__(self):
1245 result = ['<%s.%s' % (self.__class__.__module__,
1246 self.__class__.__qualname__)]
1247 if self.fp is not None:
1248 if self._filePassed:
1249 result.append(' file=%r' % self.fp)
1250 elif self.filename is not None:
1251 result.append(' filename=%r' % self.filename)
1252 result.append(' mode=%r' % self.mode)
1253 else:
1254 result.append(' [closed]')
1255 result.append('>')
1256 return ''.join(result)
1257
Tim Peters7d3bad62001-04-04 18:56:49 +00001258 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001259 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001260 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001261 try:
1262 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001263 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001264 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001265 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001266 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001267 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001268 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001269 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1270 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001271 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001272
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001273 # "concat" is zero, unless zip was concatenated to another file
1274 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001275 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1276 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001277 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001278
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001279 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001280 inferred = concat + offset_cd
1281 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001282 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001283 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001284 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001285 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001286 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 total = 0
1288 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001289 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001290 if len(centdir) != sizeCentralDir:
1291 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001292 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001293 if centdir[_CD_SIGNATURE] != stringCentralDir:
1294 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001295 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001296 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001297 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001298 flags = centdir[5]
1299 if flags & 0x800:
1300 # UTF-8 file names extension
1301 filename = filename.decode('utf-8')
1302 else:
1303 # Historical ZIP filename encoding
1304 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001306 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001307 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1308 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001309 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001311 x.flag_bits, x.compress_type, t, d,
1312 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001313 if x.extract_version > MAX_EXTRACT_VERSION:
1314 raise NotImplementedError("zip file version %.1f" %
1315 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1317 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001318 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001320 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001321
1322 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001323 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001324 self.filelist.append(x)
1325 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001326
1327 # update total bytes read from central directory
1328 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1329 + centdir[_CD_EXTRA_FIELD_LENGTH]
1330 + centdir[_CD_COMMENT_LENGTH])
1331
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001332 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001333 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001334
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001335
1336 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001337 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001338 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339
1340 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001341 """Return a list of class ZipInfo instances for files in the
1342 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001343 return self.filelist
1344
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001345 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001346 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001347 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1348 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001350 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001351 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1352 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001353
1354 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001355 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001356 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001357 for zinfo in self.filelist:
1358 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001359 # Read by chunks, to avoid an OverflowError or a
1360 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001361 with self.open(zinfo.filename, "r") as f:
1362 while f.read(chunk_size): # Check CRC-32
1363 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001364 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001365 return zinfo.filename
1366
1367 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001368 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001369 info = self.NameToInfo.get(name)
1370 if info is None:
1371 raise KeyError(
1372 'There is no item named %r in the archive' % name)
1373
1374 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001375
Thomas Wouterscf297e42007-02-23 15:07:44 +00001376 def setpassword(self, pwd):
1377 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001378 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001379 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001380 if pwd:
1381 self.pwd = pwd
1382 else:
1383 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001384
R David Murrayf50b38a2012-04-12 18:44:58 -04001385 @property
1386 def comment(self):
1387 """The comment text associated with the ZIP file."""
1388 return self._comment
1389
1390 @comment.setter
1391 def comment(self, comment):
1392 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001393 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001394 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001395 if len(comment) > ZIP_MAX_COMMENT:
1396 import warnings
1397 warnings.warn('Archive comment is too long; truncating to %d bytes'
1398 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001399 comment = comment[:ZIP_MAX_COMMENT]
1400 self._comment = comment
1401 self._didModify = True
1402
Thomas Wouterscf297e42007-02-23 15:07:44 +00001403 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001404 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001405 with self.open(name, "r", pwd) as fp:
1406 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001407
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001408 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001409 """Return file-like object for 'name'.
1410
1411 name is a string for the file name within the ZIP file, or a ZipInfo
1412 object.
1413
1414 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1415 write to a file newly added to the archive.
1416
1417 pwd is the password to decrypt files (only used for reading).
1418
1419 When writing, if the file size is not known in advance but may exceed
1420 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1421 files. If the size is known in advance, it is best to pass a ZipInfo
1422 instance for name, with zinfo.file_size set.
1423 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001424 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001425 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001426 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001427 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001428 if pwd and (mode == "w"):
1429 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001430 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001431 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001432 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001433
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001434 # Make sure we have an info object
1435 if isinstance(name, ZipInfo):
1436 # 'name' is already an info object
1437 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001438 elif mode == 'w':
1439 zinfo = ZipInfo(name)
1440 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001441 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001442 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001443 # Get info object for name
1444 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001445
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001446 if mode == 'w':
1447 return self._open_to_write(zinfo, force_zip64=force_zip64)
1448
1449 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001450 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001451 "is an open writing handle on it. "
1452 "Close the writing handle before trying to read.")
1453
1454 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001455 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001456 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1457 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001458 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001459 # Skip the file header:
1460 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001461 if len(fheader) != sizeFileHeader:
1462 raise BadZipFile("Truncated file header")
1463 fheader = struct.unpack(structFileHeader, fheader)
1464 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001465 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001466
Antoine Pitrou17babc52012-11-17 23:50:08 +01001467 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1468 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1469 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001470
Antoine Pitrou8572da52012-11-17 23:52:05 +01001471 if zinfo.flag_bits & 0x20:
1472 # Zip 2.7: compressed patched data
1473 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001474
Antoine Pitrou8572da52012-11-17 23:52:05 +01001475 if zinfo.flag_bits & 0x40:
1476 # strong encryption
1477 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001478
Antoine Pitrou17babc52012-11-17 23:50:08 +01001479 if zinfo.flag_bits & 0x800:
1480 # UTF-8 filename
1481 fname_str = fname.decode("utf-8")
1482 else:
1483 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001484
Antoine Pitrou17babc52012-11-17 23:50:08 +01001485 if fname_str != zinfo.orig_filename:
1486 raise BadZipFile(
1487 'File name in directory %r and header %r differ.'
1488 % (zinfo.orig_filename, fname))
1489
1490 # check for encrypted flag & handle password
1491 is_encrypted = zinfo.flag_bits & 0x1
1492 zd = None
1493 if is_encrypted:
1494 if not pwd:
1495 pwd = self.pwd
1496 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001497 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001498 "required for extraction" % name)
1499
1500 zd = _ZipDecrypter(pwd)
1501 # The first 12 bytes in the cypher stream is an encryption header
1502 # used to strengthen the algorithm. The first 11 bytes are
1503 # completely random, while the 12th contains the MSB of the CRC,
1504 # or the MSB of the file time depending on the header type
1505 # and is used to check the correctness of the password.
1506 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001507 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001508 if zinfo.flag_bits & 0x8:
1509 # compare against the file type from extended local headers
1510 check_byte = (zinfo._raw_time >> 8) & 0xff
1511 else:
1512 # compare against the CRC otherwise
1513 check_byte = (zinfo.CRC >> 24) & 0xff
1514 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001515 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001516
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001517 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001518 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001519 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001520 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001521
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001522 def _open_to_write(self, zinfo, force_zip64=False):
1523 if force_zip64 and not self._allowZip64:
1524 raise ValueError(
1525 "force_zip64 is True, but allowZip64 was False when opening "
1526 "the ZIP file."
1527 )
1528 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001529 raise ValueError("Can't write to the ZIP file while there is "
1530 "another write handle open on it. "
1531 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001532
1533 # Sizes and CRC are overwritten with correct data after processing the file
1534 if not hasattr(zinfo, 'file_size'):
1535 zinfo.file_size = 0
1536 zinfo.compress_size = 0
1537 zinfo.CRC = 0
1538
1539 zinfo.flag_bits = 0x00
1540 if zinfo.compress_type == ZIP_LZMA:
1541 # Compressed data includes an end-of-stream (EOS) marker
1542 zinfo.flag_bits |= 0x02
1543 if not self._seekable:
1544 zinfo.flag_bits |= 0x08
1545
1546 if not zinfo.external_attr:
1547 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1548
1549 # Compressed size can be larger than uncompressed size
1550 zip64 = self._allowZip64 and \
1551 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1552
1553 if self._seekable:
1554 self.fp.seek(self.start_dir)
1555 zinfo.header_offset = self.fp.tell()
1556
1557 self._writecheck(zinfo)
1558 self._didModify = True
1559
1560 self.fp.write(zinfo.FileHeader(zip64))
1561
1562 self._writing = True
1563 return _ZipWriteFile(self, zinfo, zip64)
1564
Christian Heimes790c8232008-01-07 21:14:23 +00001565 def extract(self, member, path=None, pwd=None):
1566 """Extract a member from the archive to the current working directory,
1567 using its full name. Its file information is extracted as accurately
1568 as possible. `member' may be a filename or a ZipInfo object. You can
1569 specify a different directory using `path'.
1570 """
Christian Heimes790c8232008-01-07 21:14:23 +00001571 if path is None:
1572 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001573 else:
1574 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001575
1576 return self._extract_member(member, path, pwd)
1577
1578 def extractall(self, path=None, members=None, pwd=None):
1579 """Extract all members from the archive to the current working
1580 directory. `path' specifies a different directory to extract to.
1581 `members' is optional and must be a subset of the list returned
1582 by namelist().
1583 """
1584 if members is None:
1585 members = self.namelist()
1586
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001587 if path is None:
1588 path = os.getcwd()
1589 else:
1590 path = os.fspath(path)
1591
Christian Heimes790c8232008-01-07 21:14:23 +00001592 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001593 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001594
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001595 @classmethod
1596 def _sanitize_windows_name(cls, arcname, pathsep):
1597 """Replace bad characters and remove trailing dots from parts."""
1598 table = cls._windows_illegal_name_trans_table
1599 if not table:
1600 illegal = ':<>|"?*'
1601 table = str.maketrans(illegal, '_' * len(illegal))
1602 cls._windows_illegal_name_trans_table = table
1603 arcname = arcname.translate(table)
1604 # remove trailing dots
1605 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1606 # rejoin, removing empty parts.
1607 arcname = pathsep.join(x for x in arcname if x)
1608 return arcname
1609
Christian Heimes790c8232008-01-07 21:14:23 +00001610 def _extract_member(self, member, targetpath, pwd):
1611 """Extract the ZipInfo object 'member' to a physical
1612 file on the path targetpath.
1613 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001614 if not isinstance(member, ZipInfo):
1615 member = self.getinfo(member)
1616
Christian Heimes790c8232008-01-07 21:14:23 +00001617 # build the destination pathname, replacing
1618 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001619 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001620
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001621 if os.path.altsep:
1622 arcname = arcname.replace(os.path.altsep, os.path.sep)
1623 # interpret absolute pathname as relative, remove drive letter or
1624 # UNC path, redundant separators, "." and ".." components.
1625 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001626 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001627 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001628 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001629 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001630 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001631 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001632
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001633 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001634 targetpath = os.path.normpath(targetpath)
1635
1636 # Create all upper directories if necessary.
1637 upperdirs = os.path.dirname(targetpath)
1638 if upperdirs and not os.path.exists(upperdirs):
1639 os.makedirs(upperdirs)
1640
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001641 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001642 if not os.path.isdir(targetpath):
1643 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001644 return targetpath
1645
Antoine Pitrou17babc52012-11-17 23:50:08 +01001646 with self.open(member, pwd=pwd) as source, \
1647 open(targetpath, "wb") as target:
1648 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001649
1650 return targetpath
1651
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001652 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001653 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001654 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001655 import warnings
1656 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001657 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001658 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001659 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001660 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001661 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001662 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001663 if not self._allowZip64:
1664 requires_zip64 = None
1665 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1666 requires_zip64 = "Files count"
1667 elif zinfo.file_size > ZIP64_LIMIT:
1668 requires_zip64 = "Filesize"
1669 elif zinfo.header_offset > ZIP64_LIMIT:
1670 requires_zip64 = "Zipfile size"
1671 if requires_zip64:
1672 raise LargeZipFile(requires_zip64 +
1673 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001674
Bo Baylesce237c72018-01-29 23:54:07 -06001675 def write(self, filename, arcname=None,
1676 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001677 """Put the bytes from filename into the archive under the name
1678 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001679 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001680 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001681 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001682 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001683 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001684 "Can't write to ZIP archive while an open writing handle exists"
1685 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001686
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001687 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001688
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001689 if zinfo.is_dir():
1690 zinfo.compress_size = 0
1691 zinfo.CRC = 0
1692 else:
1693 if compress_type is not None:
1694 zinfo.compress_type = compress_type
1695 else:
1696 zinfo.compress_type = self.compression
1697
Bo Baylesce237c72018-01-29 23:54:07 -06001698 if compresslevel is not None:
1699 zinfo._compresslevel = compresslevel
1700 else:
1701 zinfo._compresslevel = self.compresslevel
1702
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001703 if zinfo.is_dir():
1704 with self._lock:
1705 if self._seekable:
1706 self.fp.seek(self.start_dir)
1707 zinfo.header_offset = self.fp.tell() # Start of header bytes
1708 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001709 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001710 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001711
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001712 self._writecheck(zinfo)
1713 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001714
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001715 self.filelist.append(zinfo)
1716 self.NameToInfo[zinfo.filename] = zinfo
1717 self.fp.write(zinfo.FileHeader(False))
1718 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001719 else:
1720 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1721 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001722
Bo Baylesce237c72018-01-29 23:54:07 -06001723 def writestr(self, zinfo_or_arcname, data,
1724 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001725 """Write a file into the archive. The contents is 'data', which
1726 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1727 it is encoded as UTF-8 first.
1728 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001729 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001730 if isinstance(data, str):
1731 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001732 if not isinstance(zinfo_or_arcname, ZipInfo):
1733 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001734 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001735 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001736 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001737 if zinfo.filename[-1] == '/':
1738 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1739 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1740 else:
1741 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001742 else:
1743 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001744
1745 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001746 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001747 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001748 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001749 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001750 "Can't write to ZIP archive while an open writing handle exists."
1751 )
1752
1753 if compress_type is not None:
1754 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001755
Bo Baylesce237c72018-01-29 23:54:07 -06001756 if compresslevel is not None:
1757 zinfo._compresslevel = compresslevel
1758
Guido van Rossum85825dc2007-08-27 17:03:28 +00001759 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001760 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001761 with self.open(zinfo, mode='w') as dest:
1762 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001763
1764 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001765 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001766 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001767
1768 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001769 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001770 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001771 if self.fp is None:
1772 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001773
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001774 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001775 raise ValueError("Can't close the ZIP file while there is "
1776 "an open writing handle on it. "
1777 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001778
Antoine Pitrou17babc52012-11-17 23:50:08 +01001779 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001780 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001781 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001782 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001783 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001784 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001785 finally:
1786 fp = self.fp
1787 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001788 self._fpclose(fp)
1789
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001790 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001791 for zinfo in self.filelist: # write central directory
1792 dt = zinfo.date_time
1793 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1794 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1795 extra = []
1796 if zinfo.file_size > ZIP64_LIMIT \
1797 or zinfo.compress_size > ZIP64_LIMIT:
1798 extra.append(zinfo.file_size)
1799 extra.append(zinfo.compress_size)
1800 file_size = 0xffffffff
1801 compress_size = 0xffffffff
1802 else:
1803 file_size = zinfo.file_size
1804 compress_size = zinfo.compress_size
1805
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001806 if zinfo.header_offset > ZIP64_LIMIT:
1807 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001808 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001809 else:
1810 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001811
1812 extra_data = zinfo.extra
1813 min_version = 0
1814 if extra:
1815 # Append a ZIP64 field to the extra's
1816 extra_data = struct.pack(
1817 '<HH' + 'Q'*len(extra),
1818 1, 8*len(extra), *extra) + extra_data
1819
1820 min_version = ZIP64_VERSION
1821
1822 if zinfo.compress_type == ZIP_BZIP2:
1823 min_version = max(BZIP2_VERSION, min_version)
1824 elif zinfo.compress_type == ZIP_LZMA:
1825 min_version = max(LZMA_VERSION, min_version)
1826
1827 extract_version = max(min_version, zinfo.extract_version)
1828 create_version = max(min_version, zinfo.create_version)
1829 try:
1830 filename, flag_bits = zinfo._encodeFilenameFlags()
1831 centdir = struct.pack(structCentralDir,
1832 stringCentralDir, create_version,
1833 zinfo.create_system, extract_version, zinfo.reserved,
1834 flag_bits, zinfo.compress_type, dostime, dosdate,
1835 zinfo.CRC, compress_size, file_size,
1836 len(filename), len(extra_data), len(zinfo.comment),
1837 0, zinfo.internal_attr, zinfo.external_attr,
1838 header_offset)
1839 except DeprecationWarning:
1840 print((structCentralDir, stringCentralDir, create_version,
1841 zinfo.create_system, extract_version, zinfo.reserved,
1842 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1843 zinfo.CRC, compress_size, file_size,
1844 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1845 0, zinfo.internal_attr, zinfo.external_attr,
1846 header_offset), file=sys.stderr)
1847 raise
1848 self.fp.write(centdir)
1849 self.fp.write(filename)
1850 self.fp.write(extra_data)
1851 self.fp.write(zinfo.comment)
1852
1853 pos2 = self.fp.tell()
1854 # Write end-of-zip-archive record
1855 centDirCount = len(self.filelist)
1856 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001857 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001858 requires_zip64 = None
1859 if centDirCount > ZIP_FILECOUNT_LIMIT:
1860 requires_zip64 = "Files count"
1861 elif centDirOffset > ZIP64_LIMIT:
1862 requires_zip64 = "Central directory offset"
1863 elif centDirSize > ZIP64_LIMIT:
1864 requires_zip64 = "Central directory size"
1865 if requires_zip64:
1866 # Need to write the ZIP64 end-of-archive records
1867 if not self._allowZip64:
1868 raise LargeZipFile(requires_zip64 +
1869 " would require ZIP64 extensions")
1870 zip64endrec = struct.pack(
1871 structEndArchive64, stringEndArchive64,
1872 44, 45, 45, 0, 0, centDirCount, centDirCount,
1873 centDirSize, centDirOffset)
1874 self.fp.write(zip64endrec)
1875
1876 zip64locrec = struct.pack(
1877 structEndArchive64Locator,
1878 stringEndArchive64Locator, 0, pos2, 1)
1879 self.fp.write(zip64locrec)
1880 centDirCount = min(centDirCount, 0xFFFF)
1881 centDirSize = min(centDirSize, 0xFFFFFFFF)
1882 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1883
1884 endrec = struct.pack(structEndArchive, stringEndArchive,
1885 0, 0, centDirCount, centDirCount,
1886 centDirSize, centDirOffset, len(self._comment))
1887 self.fp.write(endrec)
1888 self.fp.write(self._comment)
1889 self.fp.flush()
1890
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001891 def _fpclose(self, fp):
1892 assert self._fileRefCnt > 0
1893 self._fileRefCnt -= 1
1894 if not self._fileRefCnt and not self._filePassed:
1895 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001896
1897
1898class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001899 """Class to create ZIP archives with Python library files and packages."""
1900
Georg Brandl8334fd92010-12-04 10:26:46 +00001901 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001902 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001903 ZipFile.__init__(self, file, mode=mode, compression=compression,
1904 allowZip64=allowZip64)
1905 self._optimize = optimize
1906
Christian Tismer59202e52013-10-21 03:59:23 +02001907 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001908 """Add all files from "pathname" to the ZIP archive.
1909
Fred Drake484d7352000-10-02 21:14:52 +00001910 If pathname is a package directory, search the directory and
1911 all package subdirectories recursively for all *.py and enter
1912 the modules into the archive. If pathname is a plain
1913 directory, listdir *.py and enter all modules. Else, pathname
1914 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001915 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001916 This method will compile the module.py into module.pyc if
1917 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001918 If filterfunc(pathname) is given, it is called with every argument.
1919 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001920 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001921 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001922 if filterfunc and not filterfunc(pathname):
1923 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001924 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001925 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001926 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001927 dir, name = os.path.split(pathname)
1928 if os.path.isdir(pathname):
1929 initname = os.path.join(pathname, "__init__.py")
1930 if os.path.isfile(initname):
1931 # This is a package directory, add it
1932 if basename:
1933 basename = "%s/%s" % (basename, name)
1934 else:
1935 basename = name
1936 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001937 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001938 fname, arcname = self._get_codename(initname[0:-3], basename)
1939 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001940 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001941 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001942 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001943 dirlist.remove("__init__.py")
1944 # Add all *.py files and package subdirectories
1945 for filename in dirlist:
1946 path = os.path.join(pathname, filename)
1947 root, ext = os.path.splitext(filename)
1948 if os.path.isdir(path):
1949 if os.path.isfile(os.path.join(path, "__init__.py")):
1950 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001951 self.writepy(path, basename,
1952 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001953 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001954 if filterfunc and not filterfunc(path):
1955 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001956 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001957 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001958 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001959 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001960 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001961 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001962 self.write(fname, arcname)
1963 else:
1964 # This is NOT a package directory, add its files at top level
1965 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001966 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001967 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001968 path = os.path.join(pathname, filename)
1969 root, ext = os.path.splitext(filename)
1970 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001971 if filterfunc and not filterfunc(path):
1972 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001973 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001974 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001975 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001976 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001977 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001978 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001979 self.write(fname, arcname)
1980 else:
1981 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001982 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001983 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001984 fname, arcname = self._get_codename(pathname[0:-3], basename)
1985 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001986 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001987 self.write(fname, arcname)
1988
1989 def _get_codename(self, pathname, basename):
1990 """Return (filename, archivename) for the path.
1991
Fred Drake484d7352000-10-02 21:14:52 +00001992 Given a module name path, return the correct file path and
1993 archive name, compiling if necessary. For example, given
1994 /python/lib/string, return (/python/lib/string.pyc, string).
1995 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001996 def _compile(file, optimize=-1):
1997 import py_compile
1998 if self.debug:
1999 print("Compiling", file)
2000 try:
2001 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002002 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002003 print(err.msg)
2004 return False
2005 return True
2006
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002007 file_py = pathname + ".py"
2008 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002009 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2010 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2011 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002012 if self._optimize == -1:
2013 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002014 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002015 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2016 # Use .pyc file.
2017 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002018 elif (os.path.isfile(pycache_opt0) and
2019 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002020 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2021 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002022 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002023 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002024 elif (os.path.isfile(pycache_opt1) and
2025 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2026 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002027 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002028 fname = pycache_opt1
2029 arcname = file_pyc
2030 elif (os.path.isfile(pycache_opt2) and
2031 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2032 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2033 # file name in the archive.
2034 fname = pycache_opt2
2035 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002036 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002037 # Compile py into PEP 3147 pyc file.
2038 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002039 if sys.flags.optimize == 0:
2040 fname = pycache_opt0
2041 elif sys.flags.optimize == 1:
2042 fname = pycache_opt1
2043 else:
2044 fname = pycache_opt2
2045 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002046 else:
2047 fname = arcname = file_py
2048 else:
2049 # new mode: use given optimization level
2050 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002051 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002052 arcname = file_pyc
2053 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002054 arcname = file_pyc
2055 if self._optimize == 1:
2056 fname = pycache_opt1
2057 elif self._optimize == 2:
2058 fname = pycache_opt2
2059 else:
2060 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2061 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002062 if not (os.path.isfile(fname) and
2063 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2064 if not _compile(file_py, optimize=self._optimize):
2065 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002066 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002067 if basename:
2068 archivename = "%s/%s" % (basename, archivename)
2069 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002070
2071
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002072def main(args=None):
2073 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002074
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002075 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002076 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002077 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002078 group.add_argument('-l', '--list', metavar='<zipfile>',
2079 help='Show listing of a zipfile')
2080 group.add_argument('-e', '--extract', nargs=2,
2081 metavar=('<zipfile>', '<output_dir>'),
2082 help='Extract zipfile into target dir')
2083 group.add_argument('-c', '--create', nargs='+',
2084 metavar=('<name>', '<file>'),
2085 help='Create zipfile from sources')
2086 group.add_argument('-t', '--test', metavar='<zipfile>',
2087 help='Test if a zipfile is valid')
2088 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002089
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002090 if args.test is not None:
2091 src = args.test
2092 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002093 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002094 if badfile:
2095 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002096 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002097
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002098 elif args.list is not None:
2099 src = args.list
2100 with ZipFile(src, 'r') as zf:
2101 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002102
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002103 elif args.extract is not None:
2104 src, curdir = args.extract
2105 with ZipFile(src, 'r') as zf:
2106 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002107
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002108 elif args.create is not None:
2109 zip_name = args.create.pop(0)
2110 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002111
2112 def addToZip(zf, path, zippath):
2113 if os.path.isfile(path):
2114 zf.write(path, zippath, ZIP_DEFLATED)
2115 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002116 if zippath:
2117 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002118 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002119 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002120 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002121 # else: ignore
2122
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002123 with ZipFile(zip_name, 'w') as zf:
2124 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002125 zippath = os.path.basename(path)
2126 if not zippath:
2127 zippath = os.path.basename(os.path.dirname(path))
2128 if zippath in ('', os.curdir, os.pardir):
2129 zippath = ''
2130 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002131
2132if __name__ == "__main__":
2133 main()