blob: f9db45f58a2bde79d82aa722353a6f01a88fd985 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000162def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164 if _EndRecData(fp):
165 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200166 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000168 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000170def is_zipfile(filename):
171 """Quickly see if a file is a ZIP file by checking the magic number.
172
173 The filename argument may be a file or file-like object too.
174 """
175 result = False
176 try:
177 if hasattr(filename, "read"):
178 result = _check_zipfile(fp=filename)
179 else:
180 with open(filename, "rb") as fp:
181 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200182 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000183 pass
184 return result
185
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186def _EndRecData64(fpin, offset, endrec):
187 """
188 Read the ZIP64 end-of-archive records and use that to update endrec
189 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000190 try:
191 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000193 # If the seek fails, the file is not large enough to contain a ZIP64
194 # end-of-archive record, so just return the end record we were given.
195 return endrec
196
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200198 if len(data) != sizeEndCentDir64Locator:
199 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000200 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
201 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 return endrec
203
204 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000205 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
207 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
209 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200210 if len(data) != sizeEndCentDir64:
211 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200213 dircount, dircount2, dirsize, diroffset = \
214 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000215 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 return endrec
217
218 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000219 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec[_ECD_DISK_NUMBER] = disk_num
221 endrec[_ECD_DISK_START] = disk_dir
222 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
223 endrec[_ECD_ENTRIES_TOTAL] = dircount2
224 endrec[_ECD_SIZE] = dirsize
225 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000226 return endrec
227
228
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229def _EndRecData(fpin):
230 """Return data from the "End of Central Directory" record, or None.
231
232 The data is a list of the nine items in the ZIP "End of central dir"
233 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234
235 # Determine file size
236 fpin.seek(0, 2)
237 filesize = fpin.tell()
238
239 # Check to see if this is ZIP file with no archive comment (the
240 # "end of central directory" structure should be the last item in the
241 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000242 try:
243 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200244 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000245 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200247 if (len(data) == sizeEndCentDir and
248 data[0:4] == stringEndArchive and
249 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000250 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000251 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 endrec=list(endrec)
253
254 # Append a blank comment and record start offset
255 endrec.append(b"")
256 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000258 # Try to read the "Zip64 end of central directory" structure
259 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Either this is not a ZIP file, or it is a ZIP file with an archive
262 # comment. Search the end of the file for the "end of central directory"
263 # record signature. The comment is the last item in the ZIP file and may be
264 # up to 64K long. It is assumed that the "end of central directory" magic
265 # number does not appear in the comment.
266 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
267 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000268 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000269 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000270 if start >= 0:
271 # found the magic number; attempt to unpack and interpret
272 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if len(recData) != sizeEndCentDir:
274 # Zip file is corrupted.
275 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000276 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400277 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
278 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
279 endrec.append(comment)
280 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000281
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 # Try to read the "Zip64 end of central directory" structure
283 return _EndRecData64(fpin, maxCommentStart + start - filesize,
284 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
286 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200287 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000288
Fred Drake484d7352000-10-02 21:14:52 +0000289
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000291 """Class with attributes describing each file in the ZIP archive."""
292
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000293 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200294 'orig_filename',
295 'filename',
296 'date_time',
297 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600298 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600338 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200371 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200439 if ln+4 > len(extra):
440 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
441 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000447 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448 elif ln == 0:
449 counts = ()
450 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300451 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000452
453 idx = 0
454
455 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000456 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000457 self.file_size = counts[idx]
458 idx += 1
459
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000460 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000461 self.compress_size = counts[idx]
462 idx += 1
463
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000464 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 old = self.header_offset
466 self.header_offset = counts[idx]
467 idx+=1
468
469 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000470
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200471 @classmethod
472 def from_file(cls, filename, arcname=None):
473 """Construct an appropriate ZipInfo for a file on the filesystem.
474
475 filename should be the path to a file or directory on the filesystem.
476
477 arcname is the name which it will have within the archive (by default,
478 this will be the same as filename, but without a drive letter and with
479 leading path separators removed).
480 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200481 if isinstance(filename, os.PathLike):
482 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200483 st = os.stat(filename)
484 isdir = stat.S_ISDIR(st.st_mode)
485 mtime = time.localtime(st.st_mtime)
486 date_time = mtime[0:6]
487 # Create ZipInfo instance to store file information
488 if arcname is None:
489 arcname = filename
490 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
491 while arcname[0] in (os.sep, os.altsep):
492 arcname = arcname[1:]
493 if isdir:
494 arcname += '/'
495 zinfo = cls(arcname, date_time)
496 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
497 if isdir:
498 zinfo.file_size = 0
499 zinfo.external_attr |= 0x10 # MS-DOS directory flag
500 else:
501 zinfo.file_size = st.st_size
502
503 return zinfo
504
505 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300506 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200507 return self.filename[-1] == '/'
508
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000509
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300510# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
511# internal keys. We noticed that a direct implementation is faster than
512# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000513
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300514_crctable = None
515def _gen_crc(crc):
516 for j in range(8):
517 if crc & 1:
518 crc = (crc >> 1) ^ 0xEDB88320
519 else:
520 crc >>= 1
521 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000522
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300523# ZIP supports a password-based form of encryption. Even though known
524# plaintext attacks have been found against it, it is still useful
525# to be able to get data out of such a file.
526#
527# Usage:
528# zd = _ZipDecrypter(mypwd)
529# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000530
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300531def _ZipDecrypter(pwd):
532 key0 = 305419896
533 key1 = 591751049
534 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000535
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300536 global _crctable
537 if _crctable is None:
538 _crctable = list(map(_gen_crc, range(256)))
539 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300541 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000542 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300543 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300545 def update_keys(c):
546 nonlocal key0, key1, key2
547 key0 = crc32(c, key0)
548 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
549 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
550 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552 for p in pwd:
553 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000554
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300555 def decrypter(data):
556 """Decrypt a bytes object."""
557 result = bytearray()
558 append = result.append
559 for c in data:
560 k = key2 | 2
561 c ^= ((k * (k^1)) >> 8) & 0xFF
562 update_keys(c)
563 append(c)
564 return bytes(result)
565
566 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200568
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200569class LZMACompressor:
570
571 def __init__(self):
572 self._comp = None
573
574 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200575 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200576 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200577 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200578 ])
579 return struct.pack('<BBH', 9, 4, len(props)) + props
580
581 def compress(self, data):
582 if self._comp is None:
583 return self._init() + self._comp.compress(data)
584 return self._comp.compress(data)
585
586 def flush(self):
587 if self._comp is None:
588 return self._init() + self._comp.flush()
589 return self._comp.flush()
590
591
592class LZMADecompressor:
593
594 def __init__(self):
595 self._decomp = None
596 self._unconsumed = b''
597 self.eof = False
598
599 def decompress(self, data):
600 if self._decomp is None:
601 self._unconsumed += data
602 if len(self._unconsumed) <= 4:
603 return b''
604 psize, = struct.unpack('<H', self._unconsumed[2:4])
605 if len(self._unconsumed) <= 4 + psize:
606 return b''
607
608 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200609 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
610 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611 ])
612 data = self._unconsumed[4 + psize:]
613 del self._unconsumed
614
615 result = self._decomp.decompress(data)
616 self.eof = self._decomp.eof
617 return result
618
619
620compressor_names = {
621 0: 'store',
622 1: 'shrink',
623 2: 'reduce',
624 3: 'reduce',
625 4: 'reduce',
626 5: 'reduce',
627 6: 'implode',
628 7: 'tokenize',
629 8: 'deflate',
630 9: 'deflate64',
631 10: 'implode',
632 12: 'bzip2',
633 14: 'lzma',
634 18: 'terse',
635 19: 'lz77',
636 97: 'wavpack',
637 98: 'ppmd',
638}
639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640def _check_compression(compression):
641 if compression == ZIP_STORED:
642 pass
643 elif compression == ZIP_DEFLATED:
644 if not zlib:
645 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200646 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647 elif compression == ZIP_BZIP2:
648 if not bz2:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200651 elif compression == ZIP_LZMA:
652 if not lzma:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200655 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300656 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200657
658
Bo Baylesce237c72018-01-29 23:54:07 -0600659def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200660 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600661 if compresslevel is not None:
662 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
663 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200664 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600665 if compresslevel is not None:
666 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600668 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200669 elif compress_type == ZIP_LZMA:
670 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200671 else:
672 return None
673
674
675def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200676 if compress_type == ZIP_STORED:
677 return None
678 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200679 return zlib.decompressobj(-15)
680 elif compress_type == ZIP_BZIP2:
681 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200682 elif compress_type == ZIP_LZMA:
683 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200685 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200686 if descr:
687 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
688 else:
689 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690
691
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200692class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300693 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200694 self._file = file
695 self._pos = pos
696 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200697 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300698 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200699
700 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200701 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300702 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300703 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300704 "is an open writing handle on it. "
705 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200706 self._file.seek(self._pos)
707 data = self._file.read(n)
708 self._pos = self._file.tell()
709 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200710
711 def close(self):
712 if self._file is not None:
713 fileobj = self._file
714 self._file = None
715 self._close(fileobj)
716
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200717# Provide the tell method for unseekable stream
718class _Tellable:
719 def __init__(self, fp):
720 self.fp = fp
721 self.offset = 0
722
723 def write(self, data):
724 n = self.fp.write(data)
725 self.offset += n
726 return n
727
728 def tell(self):
729 return self.offset
730
731 def flush(self):
732 self.fp.flush()
733
734 def close(self):
735 self.fp.close()
736
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200737
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000738class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000739 """File-like object for reading an archive member.
740 Is returned by ZipFile.open().
741 """
742
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000743 # Max size supported by decompressor.
744 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000745
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000746 # Read from compressed files in 4k blocks.
747 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000748
Łukasz Langae94980a2010-11-22 23:31:26 +0000749 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
750 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000751 self._fileobj = fileobj
752 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000753 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000754
Ezio Melotti92b47432010-01-28 01:44:41 +0000755 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000756 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200757 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000758
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200759 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000760
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200761 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000762 self._readbuffer = b''
763 self._offset = 0
764
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000765 self.newlines = None
766
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000767 # Adjust read size for encrypted files since the first 12 bytes
768 # are for the encryption/password information.
769 if self._decrypter is not None:
770 self._compress_left -= 12
771
772 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 self.name = zipinfo.filename
774
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000775 if hasattr(zipinfo, 'CRC'):
776 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000777 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000778 else:
779 self._expected_crc = None
780
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200781 def __repr__(self):
782 result = ['<%s.%s' % (self.__class__.__module__,
783 self.__class__.__qualname__)]
784 if not self.closed:
785 result.append(' name=%r mode=%r' % (self.name, self.mode))
786 if self._compress_type != ZIP_STORED:
787 result.append(' compress_type=%s' %
788 compressor_names.get(self._compress_type,
789 self._compress_type))
790 else:
791 result.append(' [closed]')
792 result.append('>')
793 return ''.join(result)
794
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000795 def readline(self, limit=-1):
796 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800
Serhiy Storchakae670be22016-06-11 19:32:44 +0300801 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 # Shortcut common case - newline found in buffer.
803 i = self._readbuffer.find(b'\n', self._offset) + 1
804 if i > 0:
805 line = self._readbuffer[self._offset: i]
806 self._offset = i
807 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808
Serhiy Storchakae670be22016-06-11 19:32:44 +0300809 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000810
811 def peek(self, n=1):
812 """Returns buffered bytes without advancing the position."""
813 if n > len(self._readbuffer) - self._offset:
814 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200815 if len(chunk) > self._offset:
816 self._readbuffer = chunk + self._readbuffer[self._offset:]
817 self._offset = 0
818 else:
819 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000820
821 # Return up to 512 bytes to reduce allocation overhead for tight loops.
822 return self._readbuffer[self._offset: self._offset + 512]
823
824 def readable(self):
825 return True
826
827 def read(self, n=-1):
828 """Read and return up to n bytes.
829 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200831 if n is None or n < 0:
832 buf = self._readbuffer[self._offset:]
833 self._readbuffer = b''
834 self._offset = 0
835 while not self._eof:
836 buf += self._read1(self.MAX_N)
837 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838
Antoine Pitrou78157b32012-06-23 16:44:48 +0200839 end = n + self._offset
840 if end < len(self._readbuffer):
841 buf = self._readbuffer[self._offset:end]
842 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200843 return buf
844
Antoine Pitrou78157b32012-06-23 16:44:48 +0200845 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200846 buf = self._readbuffer[self._offset:]
847 self._readbuffer = b''
848 self._offset = 0
849 while n > 0 and not self._eof:
850 data = self._read1(n)
851 if n < len(data):
852 self._readbuffer = data
853 self._offset = n
854 buf += data[:n]
855 break
856 buf += data
857 n -= len(data)
858 return buf
859
860 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000861 # Update the CRC using the given data.
862 if self._expected_crc is None:
863 # No need to compute the CRC if we don't have a reference value
864 return
Martin Panterb82032f2015-12-11 05:19:29 +0000865 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000866 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200867 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000868 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000869
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000870 def read1(self, n):
871 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200873 if n is None or n < 0:
874 buf = self._readbuffer[self._offset:]
875 self._readbuffer = b''
876 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300877 while not self._eof:
878 data = self._read1(self.MAX_N)
879 if data:
880 buf += data
881 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883
Antoine Pitrou78157b32012-06-23 16:44:48 +0200884 end = n + self._offset
885 if end < len(self._readbuffer):
886 buf = self._readbuffer[self._offset:end]
887 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200888 return buf
889
Antoine Pitrou78157b32012-06-23 16:44:48 +0200890 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200891 buf = self._readbuffer[self._offset:]
892 self._readbuffer = b''
893 self._offset = 0
894 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300895 while not self._eof:
896 data = self._read1(n)
897 if n < len(data):
898 self._readbuffer = data
899 self._offset = n
900 buf += data[:n]
901 break
902 if data:
903 buf += data
904 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200905 return buf
906
907 def _read1(self, n):
908 # Read up to n compressed bytes with at most one read() system call,
909 # decrypt and decompress them.
910 if self._eof or n <= 0:
911 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000913 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 if self._compress_type == ZIP_DEFLATED:
915 ## Handle unconsumed data.
916 data = self._decompressor.unconsumed_tail
917 if n > len(data):
918 data += self._read2(n - len(data))
919 else:
920 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200922 if self._compress_type == ZIP_STORED:
923 self._eof = self._compress_left <= 0
924 elif self._compress_type == ZIP_DEFLATED:
925 n = max(n, self.MIN_READ_SIZE)
926 data = self._decompressor.decompress(data, n)
927 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200928 self._compress_left <= 0 and
929 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200930 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000931 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200932 else:
933 data = self._decompressor.decompress(data)
934 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000935
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200936 data = data[:self._left]
937 self._left -= len(data)
938 if self._left <= 0:
939 self._eof = True
940 self._update_crc(data)
941 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000942
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200943 def _read2(self, n):
944 if self._compress_left <= 0:
945 return b''
946
947 n = max(n, self.MIN_READ_SIZE)
948 n = min(n, self._compress_left)
949
950 data = self._fileobj.read(n)
951 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200952 if not data:
953 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200954
955 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300956 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000957 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000958
Łukasz Langae94980a2010-11-22 23:31:26 +0000959 def close(self):
960 try:
961 if self._close_fileobj:
962 self._fileobj.close()
963 finally:
964 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000966
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300967class _ZipWriteFile(io.BufferedIOBase):
968 def __init__(self, zf, zinfo, zip64):
969 self._zinfo = zinfo
970 self._zip64 = zip64
971 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -0600972 self._compressor = _get_compressor(zinfo.compress_type,
973 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300974 self._file_size = 0
975 self._compress_size = 0
976 self._crc = 0
977
978 @property
979 def _fileobj(self):
980 return self._zipfile.fp
981
982 def writable(self):
983 return True
984
985 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +0300986 if self.closed:
987 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300988 nbytes = len(data)
989 self._file_size += nbytes
990 self._crc = crc32(data, self._crc)
991 if self._compressor:
992 data = self._compressor.compress(data)
993 self._compress_size += len(data)
994 self._fileobj.write(data)
995 return nbytes
996
997 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +0300998 if self.closed:
999 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001000 super().close()
1001 # Flush any data from the compressor, and update header info
1002 if self._compressor:
1003 buf = self._compressor.flush()
1004 self._compress_size += len(buf)
1005 self._fileobj.write(buf)
1006 self._zinfo.compress_size = self._compress_size
1007 else:
1008 self._zinfo.compress_size = self._file_size
1009 self._zinfo.CRC = self._crc
1010 self._zinfo.file_size = self._file_size
1011
1012 # Write updated header info
1013 if self._zinfo.flag_bits & 0x08:
1014 # Write CRC and file sizes after the file data
1015 fmt = '<LQQ' if self._zip64 else '<LLL'
1016 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1017 self._zinfo.compress_size, self._zinfo.file_size))
1018 self._zipfile.start_dir = self._fileobj.tell()
1019 else:
1020 if not self._zip64:
1021 if self._file_size > ZIP64_LIMIT:
1022 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1023 'limit')
1024 if self._compress_size > ZIP64_LIMIT:
1025 raise RuntimeError('Compressed size unexpectedly exceeded '
1026 'ZIP64 limit')
1027 # Seek backwards and write file header (which will now include
1028 # correct CRC and file sizes)
1029
1030 # Preserve current position in file
1031 self._zipfile.start_dir = self._fileobj.tell()
1032 self._fileobj.seek(self._zinfo.header_offset)
1033 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1034 self._fileobj.seek(self._zipfile.start_dir)
1035
1036 self._zipfile._writing = False
1037
1038 # Successfully written: Add file to our caches
1039 self._zipfile.filelist.append(self._zinfo)
1040 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1041
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001042class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001043 """ Class with methods to open, read, write, close, list zip files.
1044
Bo Baylesce237c72018-01-29 23:54:07 -06001045 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1046 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001047
Fred Drake3d9091e2001-03-26 15:49:24 +00001048 file: Either the path to the file, or a file-like object.
1049 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001050 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1051 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001052 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1053 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001054 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1055 needed, otherwise it will raise an exception when this would
1056 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001057 compresslevel: None (default for the given compression type) or an integer
1058 specifying the level to pass to the compressor.
1059 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1060 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1061 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001062
Fred Drake3d9091e2001-03-26 15:49:24 +00001063 """
Fred Drake484d7352000-10-02 21:14:52 +00001064
Fred Drake90eac282001-02-28 05:29:34 +00001065 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001066 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001067
Bo Baylesce237c72018-01-29 23:54:07 -06001068 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1069 compresslevel=None):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001070 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1071 or append 'a'."""
1072 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001073 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001074
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001075 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001076
1077 self._allowZip64 = allowZip64
1078 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001079 self.debug = 0 # Level of printing: 0 through 3
1080 self.NameToInfo = {} # Find file info given name
1081 self.filelist = [] # List of ZipInfo instances for archive
1082 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001083 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001084 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001085 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001086 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001087
Fred Drake3d9091e2001-03-26 15:49:24 +00001088 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001089 if isinstance(file, os.PathLike):
1090 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001091 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001092 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001093 self._filePassed = 0
1094 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001095 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1096 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001097 filemode = modeDict[mode]
1098 while True:
1099 try:
1100 self.fp = io.open(file, filemode)
1101 except OSError:
1102 if filemode in modeDict:
1103 filemode = modeDict[filemode]
1104 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001105 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001106 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001107 else:
1108 self._filePassed = 1
1109 self.fp = file
1110 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001111 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001112 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001113 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001114 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001115
Antoine Pitrou17babc52012-11-17 23:50:08 +01001116 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001117 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001118 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001119 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001120 # set the modified flag so central directory gets written
1121 # even if no files are added to the archive
1122 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001123 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001124 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001125 except (AttributeError, OSError):
1126 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001127 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001128 self._seekable = False
1129 else:
1130 # Some file-like objects can provide tell() but not seek()
1131 try:
1132 self.fp.seek(self.start_dir)
1133 except (AttributeError, OSError):
1134 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001135 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001136 try:
1137 # See if file is a zip file
1138 self._RealGetContents()
1139 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001140 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001141 except BadZipFile:
1142 # file is not a zip file, just append
1143 self.fp.seek(0, 2)
1144
1145 # set the modified flag so central directory gets written
1146 # even if no files are added to the archive
1147 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001148 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001149 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001150 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001151 except:
1152 fp = self.fp
1153 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001154 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001155 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001156
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001157 def __enter__(self):
1158 return self
1159
1160 def __exit__(self, type, value, traceback):
1161 self.close()
1162
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001163 def __repr__(self):
1164 result = ['<%s.%s' % (self.__class__.__module__,
1165 self.__class__.__qualname__)]
1166 if self.fp is not None:
1167 if self._filePassed:
1168 result.append(' file=%r' % self.fp)
1169 elif self.filename is not None:
1170 result.append(' filename=%r' % self.filename)
1171 result.append(' mode=%r' % self.mode)
1172 else:
1173 result.append(' [closed]')
1174 result.append('>')
1175 return ''.join(result)
1176
Tim Peters7d3bad62001-04-04 18:56:49 +00001177 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001178 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001180 try:
1181 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001182 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001183 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001184 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001185 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001187 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001188 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1189 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001190 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001191
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001192 # "concat" is zero, unless zip was concatenated to another file
1193 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001194 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1195 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001196 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001197
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001198 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001199 inferred = concat + offset_cd
1200 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001202 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001203 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001204 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001205 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001206 total = 0
1207 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001208 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001209 if len(centdir) != sizeCentralDir:
1210 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001212 if centdir[_CD_SIGNATURE] != stringCentralDir:
1213 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001215 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001216 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001217 flags = centdir[5]
1218 if flags & 0x800:
1219 # UTF-8 file names extension
1220 filename = filename.decode('utf-8')
1221 else:
1222 # Historical ZIP filename encoding
1223 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001224 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001225 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001226 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1227 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001228 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001229 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001230 x.flag_bits, x.compress_type, t, d,
1231 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001232 if x.extract_version > MAX_EXTRACT_VERSION:
1233 raise NotImplementedError("zip file version %.1f" %
1234 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001235 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1236 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001237 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001238 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001239 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001240
1241 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001242 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001243 self.filelist.append(x)
1244 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001245
1246 # update total bytes read from central directory
1247 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1248 + centdir[_CD_EXTRA_FIELD_LENGTH]
1249 + centdir[_CD_COMMENT_LENGTH])
1250
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001251 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001252 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001253
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001254
1255 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001256 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001257 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258
1259 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001260 """Return a list of class ZipInfo instances for files in the
1261 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001262 return self.filelist
1263
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001264 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001265 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001266 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1267 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001269 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001270 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1271 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001272
1273 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001274 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001275 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001276 for zinfo in self.filelist:
1277 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001278 # Read by chunks, to avoid an OverflowError or a
1279 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001280 with self.open(zinfo.filename, "r") as f:
1281 while f.read(chunk_size): # Check CRC-32
1282 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001283 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001284 return zinfo.filename
1285
1286 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001287 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001288 info = self.NameToInfo.get(name)
1289 if info is None:
1290 raise KeyError(
1291 'There is no item named %r in the archive' % name)
1292
1293 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001294
Thomas Wouterscf297e42007-02-23 15:07:44 +00001295 def setpassword(self, pwd):
1296 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001297 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001298 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001299 if pwd:
1300 self.pwd = pwd
1301 else:
1302 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001303
R David Murrayf50b38a2012-04-12 18:44:58 -04001304 @property
1305 def comment(self):
1306 """The comment text associated with the ZIP file."""
1307 return self._comment
1308
1309 @comment.setter
1310 def comment(self, comment):
1311 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001312 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001313 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001314 if len(comment) > ZIP_MAX_COMMENT:
1315 import warnings
1316 warnings.warn('Archive comment is too long; truncating to %d bytes'
1317 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001318 comment = comment[:ZIP_MAX_COMMENT]
1319 self._comment = comment
1320 self._didModify = True
1321
Thomas Wouterscf297e42007-02-23 15:07:44 +00001322 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001323 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001324 with self.open(name, "r", pwd) as fp:
1325 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001326
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001327 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001328 """Return file-like object for 'name'.
1329
1330 name is a string for the file name within the ZIP file, or a ZipInfo
1331 object.
1332
1333 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1334 write to a file newly added to the archive.
1335
1336 pwd is the password to decrypt files (only used for reading).
1337
1338 When writing, if the file size is not known in advance but may exceed
1339 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1340 files. If the size is known in advance, it is best to pass a ZipInfo
1341 instance for name, with zinfo.file_size set.
1342 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001343 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001344 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001345 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001346 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001347 if pwd and (mode == "w"):
1348 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001350 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001351 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001352
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001353 # Make sure we have an info object
1354 if isinstance(name, ZipInfo):
1355 # 'name' is already an info object
1356 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001357 elif mode == 'w':
1358 zinfo = ZipInfo(name)
1359 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001360 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001361 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001362 # Get info object for name
1363 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001364
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001365 if mode == 'w':
1366 return self._open_to_write(zinfo, force_zip64=force_zip64)
1367
1368 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001369 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001370 "is an open writing handle on it. "
1371 "Close the writing handle before trying to read.")
1372
1373 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001374 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001375 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1376 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001377 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001378 # Skip the file header:
1379 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001380 if len(fheader) != sizeFileHeader:
1381 raise BadZipFile("Truncated file header")
1382 fheader = struct.unpack(structFileHeader, fheader)
1383 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001384 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001385
Antoine Pitrou17babc52012-11-17 23:50:08 +01001386 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1387 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1388 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389
Antoine Pitrou8572da52012-11-17 23:52:05 +01001390 if zinfo.flag_bits & 0x20:
1391 # Zip 2.7: compressed patched data
1392 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001393
Antoine Pitrou8572da52012-11-17 23:52:05 +01001394 if zinfo.flag_bits & 0x40:
1395 # strong encryption
1396 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001397
Antoine Pitrou17babc52012-11-17 23:50:08 +01001398 if zinfo.flag_bits & 0x800:
1399 # UTF-8 filename
1400 fname_str = fname.decode("utf-8")
1401 else:
1402 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001403
Antoine Pitrou17babc52012-11-17 23:50:08 +01001404 if fname_str != zinfo.orig_filename:
1405 raise BadZipFile(
1406 'File name in directory %r and header %r differ.'
1407 % (zinfo.orig_filename, fname))
1408
1409 # check for encrypted flag & handle password
1410 is_encrypted = zinfo.flag_bits & 0x1
1411 zd = None
1412 if is_encrypted:
1413 if not pwd:
1414 pwd = self.pwd
1415 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001416 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001417 "required for extraction" % name)
1418
1419 zd = _ZipDecrypter(pwd)
1420 # The first 12 bytes in the cypher stream is an encryption header
1421 # used to strengthen the algorithm. The first 11 bytes are
1422 # completely random, while the 12th contains the MSB of the CRC,
1423 # or the MSB of the file time depending on the header type
1424 # and is used to check the correctness of the password.
1425 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001426 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001427 if zinfo.flag_bits & 0x8:
1428 # compare against the file type from extended local headers
1429 check_byte = (zinfo._raw_time >> 8) & 0xff
1430 else:
1431 # compare against the CRC otherwise
1432 check_byte = (zinfo.CRC >> 24) & 0xff
1433 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001434 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001435
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001436 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001437 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001438 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001439 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001440
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001441 def _open_to_write(self, zinfo, force_zip64=False):
1442 if force_zip64 and not self._allowZip64:
1443 raise ValueError(
1444 "force_zip64 is True, but allowZip64 was False when opening "
1445 "the ZIP file."
1446 )
1447 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001448 raise ValueError("Can't write to the ZIP file while there is "
1449 "another write handle open on it. "
1450 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001451
1452 # Sizes and CRC are overwritten with correct data after processing the file
1453 if not hasattr(zinfo, 'file_size'):
1454 zinfo.file_size = 0
1455 zinfo.compress_size = 0
1456 zinfo.CRC = 0
1457
1458 zinfo.flag_bits = 0x00
1459 if zinfo.compress_type == ZIP_LZMA:
1460 # Compressed data includes an end-of-stream (EOS) marker
1461 zinfo.flag_bits |= 0x02
1462 if not self._seekable:
1463 zinfo.flag_bits |= 0x08
1464
1465 if not zinfo.external_attr:
1466 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1467
1468 # Compressed size can be larger than uncompressed size
1469 zip64 = self._allowZip64 and \
1470 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1471
1472 if self._seekable:
1473 self.fp.seek(self.start_dir)
1474 zinfo.header_offset = self.fp.tell()
1475
1476 self._writecheck(zinfo)
1477 self._didModify = True
1478
1479 self.fp.write(zinfo.FileHeader(zip64))
1480
1481 self._writing = True
1482 return _ZipWriteFile(self, zinfo, zip64)
1483
Christian Heimes790c8232008-01-07 21:14:23 +00001484 def extract(self, member, path=None, pwd=None):
1485 """Extract a member from the archive to the current working directory,
1486 using its full name. Its file information is extracted as accurately
1487 as possible. `member' may be a filename or a ZipInfo object. You can
1488 specify a different directory using `path'.
1489 """
Christian Heimes790c8232008-01-07 21:14:23 +00001490 if path is None:
1491 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001492 else:
1493 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001494
1495 return self._extract_member(member, path, pwd)
1496
1497 def extractall(self, path=None, members=None, pwd=None):
1498 """Extract all members from the archive to the current working
1499 directory. `path' specifies a different directory to extract to.
1500 `members' is optional and must be a subset of the list returned
1501 by namelist().
1502 """
1503 if members is None:
1504 members = self.namelist()
1505
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001506 if path is None:
1507 path = os.getcwd()
1508 else:
1509 path = os.fspath(path)
1510
Christian Heimes790c8232008-01-07 21:14:23 +00001511 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001512 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001513
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001514 @classmethod
1515 def _sanitize_windows_name(cls, arcname, pathsep):
1516 """Replace bad characters and remove trailing dots from parts."""
1517 table = cls._windows_illegal_name_trans_table
1518 if not table:
1519 illegal = ':<>|"?*'
1520 table = str.maketrans(illegal, '_' * len(illegal))
1521 cls._windows_illegal_name_trans_table = table
1522 arcname = arcname.translate(table)
1523 # remove trailing dots
1524 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1525 # rejoin, removing empty parts.
1526 arcname = pathsep.join(x for x in arcname if x)
1527 return arcname
1528
Christian Heimes790c8232008-01-07 21:14:23 +00001529 def _extract_member(self, member, targetpath, pwd):
1530 """Extract the ZipInfo object 'member' to a physical
1531 file on the path targetpath.
1532 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001533 if not isinstance(member, ZipInfo):
1534 member = self.getinfo(member)
1535
Christian Heimes790c8232008-01-07 21:14:23 +00001536 # build the destination pathname, replacing
1537 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001538 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001539
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001540 if os.path.altsep:
1541 arcname = arcname.replace(os.path.altsep, os.path.sep)
1542 # interpret absolute pathname as relative, remove drive letter or
1543 # UNC path, redundant separators, "." and ".." components.
1544 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001545 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001546 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001547 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001548 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001549 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001550 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001551
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001552 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001553 targetpath = os.path.normpath(targetpath)
1554
1555 # Create all upper directories if necessary.
1556 upperdirs = os.path.dirname(targetpath)
1557 if upperdirs and not os.path.exists(upperdirs):
1558 os.makedirs(upperdirs)
1559
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001560 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001561 if not os.path.isdir(targetpath):
1562 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001563 return targetpath
1564
Antoine Pitrou17babc52012-11-17 23:50:08 +01001565 with self.open(member, pwd=pwd) as source, \
1566 open(targetpath, "wb") as target:
1567 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001568
1569 return targetpath
1570
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001571 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001572 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001573 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001574 import warnings
1575 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001576 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001577 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001578 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001579 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001580 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001581 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001582 if not self._allowZip64:
1583 requires_zip64 = None
1584 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1585 requires_zip64 = "Files count"
1586 elif zinfo.file_size > ZIP64_LIMIT:
1587 requires_zip64 = "Filesize"
1588 elif zinfo.header_offset > ZIP64_LIMIT:
1589 requires_zip64 = "Zipfile size"
1590 if requires_zip64:
1591 raise LargeZipFile(requires_zip64 +
1592 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001593
Bo Baylesce237c72018-01-29 23:54:07 -06001594 def write(self, filename, arcname=None,
1595 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001596 """Put the bytes from filename into the archive under the name
1597 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001598 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001599 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001600 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001601 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001602 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001603 "Can't write to ZIP archive while an open writing handle exists"
1604 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001605
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001606 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001607
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001608 if zinfo.is_dir():
1609 zinfo.compress_size = 0
1610 zinfo.CRC = 0
1611 else:
1612 if compress_type is not None:
1613 zinfo.compress_type = compress_type
1614 else:
1615 zinfo.compress_type = self.compression
1616
Bo Baylesce237c72018-01-29 23:54:07 -06001617 if compresslevel is not None:
1618 zinfo._compresslevel = compresslevel
1619 else:
1620 zinfo._compresslevel = self.compresslevel
1621
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001622 if zinfo.is_dir():
1623 with self._lock:
1624 if self._seekable:
1625 self.fp.seek(self.start_dir)
1626 zinfo.header_offset = self.fp.tell() # Start of header bytes
1627 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001628 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001629 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001630
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001631 self._writecheck(zinfo)
1632 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001633
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001634 self.filelist.append(zinfo)
1635 self.NameToInfo[zinfo.filename] = zinfo
1636 self.fp.write(zinfo.FileHeader(False))
1637 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001638 else:
1639 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1640 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001641
Bo Baylesce237c72018-01-29 23:54:07 -06001642 def writestr(self, zinfo_or_arcname, data,
1643 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001644 """Write a file into the archive. The contents is 'data', which
1645 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1646 it is encoded as UTF-8 first.
1647 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001648 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001649 if isinstance(data, str):
1650 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001651 if not isinstance(zinfo_or_arcname, ZipInfo):
1652 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001653 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001654 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001655 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001656 if zinfo.filename[-1] == '/':
1657 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1658 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1659 else:
1660 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001661 else:
1662 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001663
1664 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001665 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001666 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001667 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001668 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001669 "Can't write to ZIP archive while an open writing handle exists."
1670 )
1671
1672 if compress_type is not None:
1673 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001674
Bo Baylesce237c72018-01-29 23:54:07 -06001675 if compresslevel is not None:
1676 zinfo._compresslevel = compresslevel
1677
Guido van Rossum85825dc2007-08-27 17:03:28 +00001678 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001679 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001680 with self.open(zinfo, mode='w') as dest:
1681 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001682
1683 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001684 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001685 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001686
1687 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001688 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001689 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001690 if self.fp is None:
1691 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001692
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001693 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001694 raise ValueError("Can't close the ZIP file while there is "
1695 "an open writing handle on it. "
1696 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001697
Antoine Pitrou17babc52012-11-17 23:50:08 +01001698 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001699 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001700 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001701 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001702 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001703 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001704 finally:
1705 fp = self.fp
1706 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001707 self._fpclose(fp)
1708
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001709 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001710 for zinfo in self.filelist: # write central directory
1711 dt = zinfo.date_time
1712 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1713 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1714 extra = []
1715 if zinfo.file_size > ZIP64_LIMIT \
1716 or zinfo.compress_size > ZIP64_LIMIT:
1717 extra.append(zinfo.file_size)
1718 extra.append(zinfo.compress_size)
1719 file_size = 0xffffffff
1720 compress_size = 0xffffffff
1721 else:
1722 file_size = zinfo.file_size
1723 compress_size = zinfo.compress_size
1724
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001725 if zinfo.header_offset > ZIP64_LIMIT:
1726 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001727 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001728 else:
1729 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001730
1731 extra_data = zinfo.extra
1732 min_version = 0
1733 if extra:
1734 # Append a ZIP64 field to the extra's
1735 extra_data = struct.pack(
1736 '<HH' + 'Q'*len(extra),
1737 1, 8*len(extra), *extra) + extra_data
1738
1739 min_version = ZIP64_VERSION
1740
1741 if zinfo.compress_type == ZIP_BZIP2:
1742 min_version = max(BZIP2_VERSION, min_version)
1743 elif zinfo.compress_type == ZIP_LZMA:
1744 min_version = max(LZMA_VERSION, min_version)
1745
1746 extract_version = max(min_version, zinfo.extract_version)
1747 create_version = max(min_version, zinfo.create_version)
1748 try:
1749 filename, flag_bits = zinfo._encodeFilenameFlags()
1750 centdir = struct.pack(structCentralDir,
1751 stringCentralDir, create_version,
1752 zinfo.create_system, extract_version, zinfo.reserved,
1753 flag_bits, zinfo.compress_type, dostime, dosdate,
1754 zinfo.CRC, compress_size, file_size,
1755 len(filename), len(extra_data), len(zinfo.comment),
1756 0, zinfo.internal_attr, zinfo.external_attr,
1757 header_offset)
1758 except DeprecationWarning:
1759 print((structCentralDir, stringCentralDir, create_version,
1760 zinfo.create_system, extract_version, zinfo.reserved,
1761 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1762 zinfo.CRC, compress_size, file_size,
1763 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1764 0, zinfo.internal_attr, zinfo.external_attr,
1765 header_offset), file=sys.stderr)
1766 raise
1767 self.fp.write(centdir)
1768 self.fp.write(filename)
1769 self.fp.write(extra_data)
1770 self.fp.write(zinfo.comment)
1771
1772 pos2 = self.fp.tell()
1773 # Write end-of-zip-archive record
1774 centDirCount = len(self.filelist)
1775 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001776 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001777 requires_zip64 = None
1778 if centDirCount > ZIP_FILECOUNT_LIMIT:
1779 requires_zip64 = "Files count"
1780 elif centDirOffset > ZIP64_LIMIT:
1781 requires_zip64 = "Central directory offset"
1782 elif centDirSize > ZIP64_LIMIT:
1783 requires_zip64 = "Central directory size"
1784 if requires_zip64:
1785 # Need to write the ZIP64 end-of-archive records
1786 if not self._allowZip64:
1787 raise LargeZipFile(requires_zip64 +
1788 " would require ZIP64 extensions")
1789 zip64endrec = struct.pack(
1790 structEndArchive64, stringEndArchive64,
1791 44, 45, 45, 0, 0, centDirCount, centDirCount,
1792 centDirSize, centDirOffset)
1793 self.fp.write(zip64endrec)
1794
1795 zip64locrec = struct.pack(
1796 structEndArchive64Locator,
1797 stringEndArchive64Locator, 0, pos2, 1)
1798 self.fp.write(zip64locrec)
1799 centDirCount = min(centDirCount, 0xFFFF)
1800 centDirSize = min(centDirSize, 0xFFFFFFFF)
1801 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1802
1803 endrec = struct.pack(structEndArchive, stringEndArchive,
1804 0, 0, centDirCount, centDirCount,
1805 centDirSize, centDirOffset, len(self._comment))
1806 self.fp.write(endrec)
1807 self.fp.write(self._comment)
1808 self.fp.flush()
1809
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001810 def _fpclose(self, fp):
1811 assert self._fileRefCnt > 0
1812 self._fileRefCnt -= 1
1813 if not self._fileRefCnt and not self._filePassed:
1814 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001815
1816
1817class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001818 """Class to create ZIP archives with Python library files and packages."""
1819
Georg Brandl8334fd92010-12-04 10:26:46 +00001820 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001821 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001822 ZipFile.__init__(self, file, mode=mode, compression=compression,
1823 allowZip64=allowZip64)
1824 self._optimize = optimize
1825
Christian Tismer59202e52013-10-21 03:59:23 +02001826 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001827 """Add all files from "pathname" to the ZIP archive.
1828
Fred Drake484d7352000-10-02 21:14:52 +00001829 If pathname is a package directory, search the directory and
1830 all package subdirectories recursively for all *.py and enter
1831 the modules into the archive. If pathname is a plain
1832 directory, listdir *.py and enter all modules. Else, pathname
1833 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001834 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001835 This method will compile the module.py into module.pyc if
1836 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001837 If filterfunc(pathname) is given, it is called with every argument.
1838 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001839 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001840 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001841 if filterfunc and not filterfunc(pathname):
1842 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001843 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001844 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001845 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001846 dir, name = os.path.split(pathname)
1847 if os.path.isdir(pathname):
1848 initname = os.path.join(pathname, "__init__.py")
1849 if os.path.isfile(initname):
1850 # This is a package directory, add it
1851 if basename:
1852 basename = "%s/%s" % (basename, name)
1853 else:
1854 basename = name
1855 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001856 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001857 fname, arcname = self._get_codename(initname[0:-3], basename)
1858 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001859 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001860 self.write(fname, arcname)
1861 dirlist = os.listdir(pathname)
1862 dirlist.remove("__init__.py")
1863 # Add all *.py files and package subdirectories
1864 for filename in dirlist:
1865 path = os.path.join(pathname, filename)
1866 root, ext = os.path.splitext(filename)
1867 if os.path.isdir(path):
1868 if os.path.isfile(os.path.join(path, "__init__.py")):
1869 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001870 self.writepy(path, basename,
1871 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001872 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001873 if filterfunc and not filterfunc(path):
1874 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001875 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001876 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001877 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001878 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001879 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001880 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001881 self.write(fname, arcname)
1882 else:
1883 # This is NOT a package directory, add its files at top level
1884 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001885 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001886 for filename in os.listdir(pathname):
1887 path = os.path.join(pathname, filename)
1888 root, ext = os.path.splitext(filename)
1889 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001890 if filterfunc and not filterfunc(path):
1891 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001892 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001893 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001894 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001895 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001896 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001897 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001898 self.write(fname, arcname)
1899 else:
1900 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001901 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001902 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001903 fname, arcname = self._get_codename(pathname[0:-3], basename)
1904 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001905 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001906 self.write(fname, arcname)
1907
1908 def _get_codename(self, pathname, basename):
1909 """Return (filename, archivename) for the path.
1910
Fred Drake484d7352000-10-02 21:14:52 +00001911 Given a module name path, return the correct file path and
1912 archive name, compiling if necessary. For example, given
1913 /python/lib/string, return (/python/lib/string.pyc, string).
1914 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001915 def _compile(file, optimize=-1):
1916 import py_compile
1917 if self.debug:
1918 print("Compiling", file)
1919 try:
1920 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001921 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001922 print(err.msg)
1923 return False
1924 return True
1925
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001926 file_py = pathname + ".py"
1927 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001928 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1929 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1930 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001931 if self._optimize == -1:
1932 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001933 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1935 # Use .pyc file.
1936 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001937 elif (os.path.isfile(pycache_opt0) and
1938 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001939 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1940 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001941 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001942 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001943 elif (os.path.isfile(pycache_opt1) and
1944 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1945 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001946 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001947 fname = pycache_opt1
1948 arcname = file_pyc
1949 elif (os.path.isfile(pycache_opt2) and
1950 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1951 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1952 # file name in the archive.
1953 fname = pycache_opt2
1954 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001955 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001956 # Compile py into PEP 3147 pyc file.
1957 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001958 if sys.flags.optimize == 0:
1959 fname = pycache_opt0
1960 elif sys.flags.optimize == 1:
1961 fname = pycache_opt1
1962 else:
1963 fname = pycache_opt2
1964 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001965 else:
1966 fname = arcname = file_py
1967 else:
1968 # new mode: use given optimization level
1969 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001970 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001971 arcname = file_pyc
1972 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001973 arcname = file_pyc
1974 if self._optimize == 1:
1975 fname = pycache_opt1
1976 elif self._optimize == 2:
1977 fname = pycache_opt2
1978 else:
1979 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1980 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001981 if not (os.path.isfile(fname) and
1982 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1983 if not _compile(file_py, optimize=self._optimize):
1984 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001985 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001986 if basename:
1987 archivename = "%s/%s" % (basename, archivename)
1988 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001989
1990
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001991def main(args=None):
1992 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001993
Serhiy Storchaka150cd192017-04-07 18:56:12 +03001994 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001995 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03001996 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001997 group.add_argument('-l', '--list', metavar='<zipfile>',
1998 help='Show listing of a zipfile')
1999 group.add_argument('-e', '--extract', nargs=2,
2000 metavar=('<zipfile>', '<output_dir>'),
2001 help='Extract zipfile into target dir')
2002 group.add_argument('-c', '--create', nargs='+',
2003 metavar=('<name>', '<file>'),
2004 help='Create zipfile from sources')
2005 group.add_argument('-t', '--test', metavar='<zipfile>',
2006 help='Test if a zipfile is valid')
2007 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002008
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002009 if args.test is not None:
2010 src = args.test
2011 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002012 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002013 if badfile:
2014 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002015 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002016
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002017 elif args.list is not None:
2018 src = args.list
2019 with ZipFile(src, 'r') as zf:
2020 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002021
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002022 elif args.extract is not None:
2023 src, curdir = args.extract
2024 with ZipFile(src, 'r') as zf:
2025 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002026
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002027 elif args.create is not None:
2028 zip_name = args.create.pop(0)
2029 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002030
2031 def addToZip(zf, path, zippath):
2032 if os.path.isfile(path):
2033 zf.write(path, zippath, ZIP_DEFLATED)
2034 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002035 if zippath:
2036 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002037 for nm in os.listdir(path):
2038 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002039 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002040 # else: ignore
2041
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002042 with ZipFile(zip_name, 'w') as zf:
2043 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002044 zippath = os.path.basename(path)
2045 if not zippath:
2046 zippath = os.path.basename(os.path.dirname(path))
2047 if zippath in ('', os.curdir, os.pardir):
2048 zippath = ''
2049 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002050
2051if __name__ == "__main__":
2052 main()