blob: 37ce3281e0928ca9977a64139b347e09ac612f63 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000162def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000163 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164 if _EndRecData(fp):
165 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200166 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000168 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000170def is_zipfile(filename):
171 """Quickly see if a file is a ZIP file by checking the magic number.
172
173 The filename argument may be a file or file-like object too.
174 """
175 result = False
176 try:
177 if hasattr(filename, "read"):
178 result = _check_zipfile(fp=filename)
179 else:
180 with open(filename, "rb") as fp:
181 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200182 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000183 pass
184 return result
185
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186def _EndRecData64(fpin, offset, endrec):
187 """
188 Read the ZIP64 end-of-archive records and use that to update endrec
189 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000190 try:
191 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200192 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000193 # If the seek fails, the file is not large enough to contain a ZIP64
194 # end-of-archive record, so just return the end record we were given.
195 return endrec
196
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200198 if len(data) != sizeEndCentDir64Locator:
199 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000200 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
201 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000202 return endrec
203
204 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000205 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206
207 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
209 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200210 if len(data) != sizeEndCentDir64:
211 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000212 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200213 dircount, dircount2, dirsize, diroffset = \
214 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000215 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 return endrec
217
218 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000219 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec[_ECD_DISK_NUMBER] = disk_num
221 endrec[_ECD_DISK_START] = disk_dir
222 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
223 endrec[_ECD_ENTRIES_TOTAL] = dircount2
224 endrec[_ECD_SIZE] = dirsize
225 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000226 return endrec
227
228
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229def _EndRecData(fpin):
230 """Return data from the "End of Central Directory" record, or None.
231
232 The data is a list of the nine items in the ZIP "End of central dir"
233 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000234
235 # Determine file size
236 fpin.seek(0, 2)
237 filesize = fpin.tell()
238
239 # Check to see if this is ZIP file with no archive comment (the
240 # "end of central directory" structure should be the last item in the
241 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000242 try:
243 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200244 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000245 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200247 if (len(data) == sizeEndCentDir and
248 data[0:4] == stringEndArchive and
249 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000250 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000251 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 endrec=list(endrec)
253
254 # Append a blank comment and record start offset
255 endrec.append(b"")
256 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000258 # Try to read the "Zip64 end of central directory" structure
259 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000260
261 # Either this is not a ZIP file, or it is a ZIP file with an archive
262 # comment. Search the end of the file for the "end of central directory"
263 # record signature. The comment is the last item in the ZIP file and may be
264 # up to 64K long. It is assumed that the "end of central directory" magic
265 # number does not appear in the comment.
266 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
267 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000268 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000269 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000270 if start >= 0:
271 # found the magic number; attempt to unpack and interpret
272 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200273 if len(recData) != sizeEndCentDir:
274 # Zip file is corrupted.
275 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000276 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400277 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
278 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
279 endrec.append(comment)
280 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000281
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 # Try to read the "Zip64 end of central directory" structure
283 return _EndRecData64(fpin, maxCommentStart + start - filesize,
284 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
286 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200287 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000288
Fred Drake484d7352000-10-02 21:14:52 +0000289
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000291 """Class with attributes describing each file in the ZIP archive."""
292
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000293 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200294 'orig_filename',
295 'filename',
296 'date_time',
297 'compress_type',
298 'comment',
299 'extra',
300 'create_system',
301 'create_version',
302 'extract_version',
303 'reserved',
304 'flag_bits',
305 'volume',
306 'internal_attr',
307 'external_attr',
308 'header_offset',
309 'CRC',
310 'compress_size',
311 'file_size',
312 '_raw_time',
313 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000314
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000315 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000316 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000317
318 # Terminate the file name at the first null byte. Null bytes in file
319 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000320 null_byte = filename.find(chr(0))
321 if null_byte >= 0:
322 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000323 # This is used to ensure paths in generated ZIP files always use
324 # forward slashes as the directory separator, as required by the
325 # ZIP format specification.
326 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000327 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000328
Greg Ward8e36d282003-06-18 00:53:06 +0000329 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000330 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800331
332 if date_time[0] < 1980:
333 raise ValueError('ZIP does not support timestamps before 1980')
334
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000336 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000337 self.comment = b"" # Comment for each file
338 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000339 if sys.platform == 'win32':
340 self.create_system = 0 # System which created ZIP archive
341 else:
342 # Assume everything else is unix-y
343 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200344 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
345 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000346 self.reserved = 0 # Must be zero
347 self.flag_bits = 0 # ZIP flag bits
348 self.volume = 0 # Volume number of file header
349 self.internal_attr = 0 # Internal attributes
350 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000351 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000352 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000353 # CRC CRC-32 of the uncompressed file
354 # compress_size Size of the compressed file
355 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200357 def __repr__(self):
358 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
359 if self.compress_type != ZIP_STORED:
360 result.append(' compress_type=%s' %
361 compressor_names.get(self.compress_type,
362 self.compress_type))
363 hi = self.external_attr >> 16
364 lo = self.external_attr & 0xFFFF
365 if hi:
366 result.append(' filemode=%r' % stat.filemode(hi))
367 if lo:
368 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200369 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200370 if not isdir or self.file_size:
371 result.append(' file_size=%r' % self.file_size)
372 if ((not isdir or self.compress_size) and
373 (self.compress_type != ZIP_STORED or
374 self.file_size != self.compress_size)):
375 result.append(' compress_size=%r' % self.compress_size)
376 result.append('>')
377 return ''.join(result)
378
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200379 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000380 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381 dt = self.date_time
382 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000383 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000384 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000385 # Set these to zero because we write them after the file data
386 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000387 else:
Tim Peterse1190062001-01-15 03:34:38 +0000388 CRC = self.CRC
389 compress_size = self.compress_size
390 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391
392 extra = self.extra
393
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200394 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200395 if zip64 is None:
396 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
397 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000398 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000399 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200400 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200401 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
402 if not zip64:
403 raise LargeZipFile("Filesize would require ZIP64 extensions")
404 # File is larger than what fits into a 4 byte integer,
405 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000406 file_size = 0xffffffff
407 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200408 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 if self.compress_type == ZIP_BZIP2:
411 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200412 elif self.compress_type == ZIP_LZMA:
413 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200414
415 self.extract_version = max(min_version, self.extract_version)
416 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000417 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000418 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200419 self.extract_version, self.reserved, flag_bits,
420 self.compress_type, dostime, dosdate, CRC,
421 compress_size, file_size,
422 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000423 return header + filename + extra
424
425 def _encodeFilenameFlags(self):
426 try:
427 return self.filename.encode('ascii'), self.flag_bits
428 except UnicodeEncodeError:
429 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000430
431 def _decodeExtra(self):
432 # Try to decode the extra field.
433 extra = self.extra
434 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700435 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000436 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200437 if ln+4 > len(extra):
438 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
439 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000440 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 0:
447 counts = ()
448 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300449 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
451 idx = 0
452
453 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000454 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455 self.file_size = counts[idx]
456 idx += 1
457
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000458 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.compress_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 old = self.header_offset
464 self.header_offset = counts[idx]
465 idx+=1
466
467 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200469 @classmethod
470 def from_file(cls, filename, arcname=None):
471 """Construct an appropriate ZipInfo for a file on the filesystem.
472
473 filename should be the path to a file or directory on the filesystem.
474
475 arcname is the name which it will have within the archive (by default,
476 this will be the same as filename, but without a drive letter and with
477 leading path separators removed).
478 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200479 if isinstance(filename, os.PathLike):
480 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200481 st = os.stat(filename)
482 isdir = stat.S_ISDIR(st.st_mode)
483 mtime = time.localtime(st.st_mtime)
484 date_time = mtime[0:6]
485 # Create ZipInfo instance to store file information
486 if arcname is None:
487 arcname = filename
488 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
489 while arcname[0] in (os.sep, os.altsep):
490 arcname = arcname[1:]
491 if isdir:
492 arcname += '/'
493 zinfo = cls(arcname, date_time)
494 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
495 if isdir:
496 zinfo.file_size = 0
497 zinfo.external_attr |= 0x10 # MS-DOS directory flag
498 else:
499 zinfo.file_size = st.st_size
500
501 return zinfo
502
503 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300504 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200505 return self.filename[-1] == '/'
506
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000507
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300508# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
509# internal keys. We noticed that a direct implementation is faster than
510# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000511
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300512_crctable = None
513def _gen_crc(crc):
514 for j in range(8):
515 if crc & 1:
516 crc = (crc >> 1) ^ 0xEDB88320
517 else:
518 crc >>= 1
519 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000520
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300521# ZIP supports a password-based form of encryption. Even though known
522# plaintext attacks have been found against it, it is still useful
523# to be able to get data out of such a file.
524#
525# Usage:
526# zd = _ZipDecrypter(mypwd)
527# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000528
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300529def _ZipDecrypter(pwd):
530 key0 = 305419896
531 key1 = 591751049
532 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000533
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300534 global _crctable
535 if _crctable is None:
536 _crctable = list(map(_gen_crc, range(256)))
537 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000538
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300539 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300541 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000542
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300543 def update_keys(c):
544 nonlocal key0, key1, key2
545 key0 = crc32(c, key0)
546 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
547 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
548 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000549
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300550 for p in pwd:
551 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000552
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300553 def decrypter(data):
554 """Decrypt a bytes object."""
555 result = bytearray()
556 append = result.append
557 for c in data:
558 k = key2 | 2
559 c ^= ((k * (k^1)) >> 8) & 0xFF
560 update_keys(c)
561 append(c)
562 return bytes(result)
563
564 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200566
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200567class LZMACompressor:
568
569 def __init__(self):
570 self._comp = None
571
572 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200573 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200574 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200575 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200576 ])
577 return struct.pack('<BBH', 9, 4, len(props)) + props
578
579 def compress(self, data):
580 if self._comp is None:
581 return self._init() + self._comp.compress(data)
582 return self._comp.compress(data)
583
584 def flush(self):
585 if self._comp is None:
586 return self._init() + self._comp.flush()
587 return self._comp.flush()
588
589
590class LZMADecompressor:
591
592 def __init__(self):
593 self._decomp = None
594 self._unconsumed = b''
595 self.eof = False
596
597 def decompress(self, data):
598 if self._decomp is None:
599 self._unconsumed += data
600 if len(self._unconsumed) <= 4:
601 return b''
602 psize, = struct.unpack('<H', self._unconsumed[2:4])
603 if len(self._unconsumed) <= 4 + psize:
604 return b''
605
606 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200607 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
608 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200609 ])
610 data = self._unconsumed[4 + psize:]
611 del self._unconsumed
612
613 result = self._decomp.decompress(data)
614 self.eof = self._decomp.eof
615 return result
616
617
618compressor_names = {
619 0: 'store',
620 1: 'shrink',
621 2: 'reduce',
622 3: 'reduce',
623 4: 'reduce',
624 5: 'reduce',
625 6: 'implode',
626 7: 'tokenize',
627 8: 'deflate',
628 9: 'deflate64',
629 10: 'implode',
630 12: 'bzip2',
631 14: 'lzma',
632 18: 'terse',
633 19: 'lz77',
634 97: 'wavpack',
635 98: 'ppmd',
636}
637
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200638def _check_compression(compression):
639 if compression == ZIP_STORED:
640 pass
641 elif compression == ZIP_DEFLATED:
642 if not zlib:
643 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200644 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200645 elif compression == ZIP_BZIP2:
646 if not bz2:
647 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200648 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200649 elif compression == ZIP_LZMA:
650 if not lzma:
651 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200652 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200653 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300654 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200655
656
657def _get_compressor(compress_type):
658 if compress_type == ZIP_DEFLATED:
659 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200660 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200661 elif compress_type == ZIP_BZIP2:
662 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200663 elif compress_type == ZIP_LZMA:
664 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200665 else:
666 return None
667
668
669def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200670 if compress_type == ZIP_STORED:
671 return None
672 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200673 return zlib.decompressobj(-15)
674 elif compress_type == ZIP_BZIP2:
675 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200676 elif compress_type == ZIP_LZMA:
677 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200679 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200680 if descr:
681 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
682 else:
683 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684
685
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200686class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300687 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200688 self._file = file
689 self._pos = pos
690 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200691 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300692 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200693
694 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200695 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300696 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300697 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300698 "is an open writing handle on it. "
699 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200700 self._file.seek(self._pos)
701 data = self._file.read(n)
702 self._pos = self._file.tell()
703 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200704
705 def close(self):
706 if self._file is not None:
707 fileobj = self._file
708 self._file = None
709 self._close(fileobj)
710
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200711# Provide the tell method for unseekable stream
712class _Tellable:
713 def __init__(self, fp):
714 self.fp = fp
715 self.offset = 0
716
717 def write(self, data):
718 n = self.fp.write(data)
719 self.offset += n
720 return n
721
722 def tell(self):
723 return self.offset
724
725 def flush(self):
726 self.fp.flush()
727
728 def close(self):
729 self.fp.close()
730
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200731
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000732class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000733 """File-like object for reading an archive member.
734 Is returned by ZipFile.open().
735 """
736
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000737 # Max size supported by decompressor.
738 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000739
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000740 # Read from compressed files in 4k blocks.
741 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000742
Łukasz Langae94980a2010-11-22 23:31:26 +0000743 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
744 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000745 self._fileobj = fileobj
746 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000747 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000748
Ezio Melotti92b47432010-01-28 01:44:41 +0000749 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000750 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200751 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000752
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200753 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000754
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200755 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000756 self._readbuffer = b''
757 self._offset = 0
758
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000759 self.newlines = None
760
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000761 # Adjust read size for encrypted files since the first 12 bytes
762 # are for the encryption/password information.
763 if self._decrypter is not None:
764 self._compress_left -= 12
765
766 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000767 self.name = zipinfo.filename
768
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000769 if hasattr(zipinfo, 'CRC'):
770 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000771 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000772 else:
773 self._expected_crc = None
774
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200775 def __repr__(self):
776 result = ['<%s.%s' % (self.__class__.__module__,
777 self.__class__.__qualname__)]
778 if not self.closed:
779 result.append(' name=%r mode=%r' % (self.name, self.mode))
780 if self._compress_type != ZIP_STORED:
781 result.append(' compress_type=%s' %
782 compressor_names.get(self._compress_type,
783 self._compress_type))
784 else:
785 result.append(' [closed]')
786 result.append('>')
787 return ''.join(result)
788
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000789 def readline(self, limit=-1):
790 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000791
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000792 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794
Serhiy Storchakae670be22016-06-11 19:32:44 +0300795 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000796 # Shortcut common case - newline found in buffer.
797 i = self._readbuffer.find(b'\n', self._offset) + 1
798 if i > 0:
799 line = self._readbuffer[self._offset: i]
800 self._offset = i
801 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802
Serhiy Storchakae670be22016-06-11 19:32:44 +0300803 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804
805 def peek(self, n=1):
806 """Returns buffered bytes without advancing the position."""
807 if n > len(self._readbuffer) - self._offset:
808 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200809 if len(chunk) > self._offset:
810 self._readbuffer = chunk + self._readbuffer[self._offset:]
811 self._offset = 0
812 else:
813 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000814
815 # Return up to 512 bytes to reduce allocation overhead for tight loops.
816 return self._readbuffer[self._offset: self._offset + 512]
817
818 def readable(self):
819 return True
820
821 def read(self, n=-1):
822 """Read and return up to n bytes.
823 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000824 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200825 if n is None or n < 0:
826 buf = self._readbuffer[self._offset:]
827 self._readbuffer = b''
828 self._offset = 0
829 while not self._eof:
830 buf += self._read1(self.MAX_N)
831 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832
Antoine Pitrou78157b32012-06-23 16:44:48 +0200833 end = n + self._offset
834 if end < len(self._readbuffer):
835 buf = self._readbuffer[self._offset:end]
836 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200837 return buf
838
Antoine Pitrou78157b32012-06-23 16:44:48 +0200839 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200840 buf = self._readbuffer[self._offset:]
841 self._readbuffer = b''
842 self._offset = 0
843 while n > 0 and not self._eof:
844 data = self._read1(n)
845 if n < len(data):
846 self._readbuffer = data
847 self._offset = n
848 buf += data[:n]
849 break
850 buf += data
851 n -= len(data)
852 return buf
853
854 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000855 # Update the CRC using the given data.
856 if self._expected_crc is None:
857 # No need to compute the CRC if we don't have a reference value
858 return
Martin Panterb82032f2015-12-11 05:19:29 +0000859 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000860 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200861 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000862 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000863
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000864 def read1(self, n):
865 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200867 if n is None or n < 0:
868 buf = self._readbuffer[self._offset:]
869 self._readbuffer = b''
870 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300871 while not self._eof:
872 data = self._read1(self.MAX_N)
873 if data:
874 buf += data
875 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200876 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877
Antoine Pitrou78157b32012-06-23 16:44:48 +0200878 end = n + self._offset
879 if end < len(self._readbuffer):
880 buf = self._readbuffer[self._offset:end]
881 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 return buf
883
Antoine Pitrou78157b32012-06-23 16:44:48 +0200884 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200885 buf = self._readbuffer[self._offset:]
886 self._readbuffer = b''
887 self._offset = 0
888 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300889 while not self._eof:
890 data = self._read1(n)
891 if n < len(data):
892 self._readbuffer = data
893 self._offset = n
894 buf += data[:n]
895 break
896 if data:
897 buf += data
898 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200899 return buf
900
901 def _read1(self, n):
902 # Read up to n compressed bytes with at most one read() system call,
903 # decrypt and decompress them.
904 if self._eof or n <= 0:
905 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000907 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200908 if self._compress_type == ZIP_DEFLATED:
909 ## Handle unconsumed data.
910 data = self._decompressor.unconsumed_tail
911 if n > len(data):
912 data += self._read2(n - len(data))
913 else:
914 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200916 if self._compress_type == ZIP_STORED:
917 self._eof = self._compress_left <= 0
918 elif self._compress_type == ZIP_DEFLATED:
919 n = max(n, self.MIN_READ_SIZE)
920 data = self._decompressor.decompress(data, n)
921 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200922 self._compress_left <= 0 and
923 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200924 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000925 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200926 else:
927 data = self._decompressor.decompress(data)
928 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200930 data = data[:self._left]
931 self._left -= len(data)
932 if self._left <= 0:
933 self._eof = True
934 self._update_crc(data)
935 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000936
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200937 def _read2(self, n):
938 if self._compress_left <= 0:
939 return b''
940
941 n = max(n, self.MIN_READ_SIZE)
942 n = min(n, self._compress_left)
943
944 data = self._fileobj.read(n)
945 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200946 if not data:
947 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200948
949 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300950 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000951 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952
Łukasz Langae94980a2010-11-22 23:31:26 +0000953 def close(self):
954 try:
955 if self._close_fileobj:
956 self._fileobj.close()
957 finally:
958 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000960
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300961class _ZipWriteFile(io.BufferedIOBase):
962 def __init__(self, zf, zinfo, zip64):
963 self._zinfo = zinfo
964 self._zip64 = zip64
965 self._zipfile = zf
966 self._compressor = _get_compressor(zinfo.compress_type)
967 self._file_size = 0
968 self._compress_size = 0
969 self._crc = 0
970
971 @property
972 def _fileobj(self):
973 return self._zipfile.fp
974
975 def writable(self):
976 return True
977
978 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +0300979 if self.closed:
980 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300981 nbytes = len(data)
982 self._file_size += nbytes
983 self._crc = crc32(data, self._crc)
984 if self._compressor:
985 data = self._compressor.compress(data)
986 self._compress_size += len(data)
987 self._fileobj.write(data)
988 return nbytes
989
990 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +0300991 if self.closed:
992 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300993 super().close()
994 # Flush any data from the compressor, and update header info
995 if self._compressor:
996 buf = self._compressor.flush()
997 self._compress_size += len(buf)
998 self._fileobj.write(buf)
999 self._zinfo.compress_size = self._compress_size
1000 else:
1001 self._zinfo.compress_size = self._file_size
1002 self._zinfo.CRC = self._crc
1003 self._zinfo.file_size = self._file_size
1004
1005 # Write updated header info
1006 if self._zinfo.flag_bits & 0x08:
1007 # Write CRC and file sizes after the file data
1008 fmt = '<LQQ' if self._zip64 else '<LLL'
1009 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1010 self._zinfo.compress_size, self._zinfo.file_size))
1011 self._zipfile.start_dir = self._fileobj.tell()
1012 else:
1013 if not self._zip64:
1014 if self._file_size > ZIP64_LIMIT:
1015 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1016 'limit')
1017 if self._compress_size > ZIP64_LIMIT:
1018 raise RuntimeError('Compressed size unexpectedly exceeded '
1019 'ZIP64 limit')
1020 # Seek backwards and write file header (which will now include
1021 # correct CRC and file sizes)
1022
1023 # Preserve current position in file
1024 self._zipfile.start_dir = self._fileobj.tell()
1025 self._fileobj.seek(self._zinfo.header_offset)
1026 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1027 self._fileobj.seek(self._zipfile.start_dir)
1028
1029 self._zipfile._writing = False
1030
1031 # Successfully written: Add file to our caches
1032 self._zipfile.filelist.append(self._zinfo)
1033 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1034
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001035class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001036 """ Class with methods to open, read, write, close, list zip files.
1037
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001038 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001039
Fred Drake3d9091e2001-03-26 15:49:24 +00001040 file: Either the path to the file, or a file-like object.
1041 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001042 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1043 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001044 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1045 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001046 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1047 needed, otherwise it will raise an exception when this would
1048 be necessary.
1049
Fred Drake3d9091e2001-03-26 15:49:24 +00001050 """
Fred Drake484d7352000-10-02 21:14:52 +00001051
Fred Drake90eac282001-02-28 05:29:34 +00001052 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001053 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001054
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001055 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001056 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1057 or append 'a'."""
1058 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001059 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001060
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001061 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001062
1063 self._allowZip64 = allowZip64
1064 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001065 self.debug = 0 # Level of printing: 0 through 3
1066 self.NameToInfo = {} # Find file info given name
1067 self.filelist = [] # List of ZipInfo instances for archive
1068 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001069 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001070 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001071 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001072
Fred Drake3d9091e2001-03-26 15:49:24 +00001073 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001074 if isinstance(file, os.PathLike):
1075 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001076 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001077 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001078 self._filePassed = 0
1079 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001080 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1081 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001082 filemode = modeDict[mode]
1083 while True:
1084 try:
1085 self.fp = io.open(file, filemode)
1086 except OSError:
1087 if filemode in modeDict:
1088 filemode = modeDict[filemode]
1089 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001090 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001091 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001092 else:
1093 self._filePassed = 1
1094 self.fp = file
1095 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001096 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001097 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001098 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001099 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001100
Antoine Pitrou17babc52012-11-17 23:50:08 +01001101 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001102 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001103 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001104 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001105 # set the modified flag so central directory gets written
1106 # even if no files are added to the archive
1107 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001108 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001109 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001110 except (AttributeError, OSError):
1111 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001112 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001113 self._seekable = False
1114 else:
1115 # Some file-like objects can provide tell() but not seek()
1116 try:
1117 self.fp.seek(self.start_dir)
1118 except (AttributeError, OSError):
1119 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001120 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001121 try:
1122 # See if file is a zip file
1123 self._RealGetContents()
1124 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001125 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001126 except BadZipFile:
1127 # file is not a zip file, just append
1128 self.fp.seek(0, 2)
1129
1130 # set the modified flag so central directory gets written
1131 # even if no files are added to the archive
1132 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001133 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001134 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001135 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001136 except:
1137 fp = self.fp
1138 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001139 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001140 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001142 def __enter__(self):
1143 return self
1144
1145 def __exit__(self, type, value, traceback):
1146 self.close()
1147
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001148 def __repr__(self):
1149 result = ['<%s.%s' % (self.__class__.__module__,
1150 self.__class__.__qualname__)]
1151 if self.fp is not None:
1152 if self._filePassed:
1153 result.append(' file=%r' % self.fp)
1154 elif self.filename is not None:
1155 result.append(' filename=%r' % self.filename)
1156 result.append(' mode=%r' % self.mode)
1157 else:
1158 result.append(' [closed]')
1159 result.append('>')
1160 return ''.join(result)
1161
Tim Peters7d3bad62001-04-04 18:56:49 +00001162 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001163 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001164 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001165 try:
1166 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001167 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001168 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001169 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001170 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001172 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001173 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1174 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001175 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001176
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001177 # "concat" is zero, unless zip was concatenated to another file
1178 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001179 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1180 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001181 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001182
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001184 inferred = concat + offset_cd
1185 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001187 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001189 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001190 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 total = 0
1192 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001193 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001194 if len(centdir) != sizeCentralDir:
1195 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001196 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001197 if centdir[_CD_SIGNATURE] != stringCentralDir:
1198 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001200 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001201 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001202 flags = centdir[5]
1203 if flags & 0x800:
1204 # UTF-8 file names extension
1205 filename = filename.decode('utf-8')
1206 else:
1207 # Historical ZIP filename encoding
1208 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001209 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001210 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001211 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1212 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001213 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001215 x.flag_bits, x.compress_type, t, d,
1216 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001217 if x.extract_version > MAX_EXTRACT_VERSION:
1218 raise NotImplementedError("zip file version %.1f" %
1219 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001220 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1221 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001222 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001223 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001224 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001225
1226 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001227 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001228 self.filelist.append(x)
1229 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001230
1231 # update total bytes read from central directory
1232 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1233 + centdir[_CD_EXTRA_FIELD_LENGTH]
1234 + centdir[_CD_COMMENT_LENGTH])
1235
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001236 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001237 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001238
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001239
1240 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001241 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001242 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001243
1244 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001245 """Return a list of class ZipInfo instances for files in the
1246 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001247 return self.filelist
1248
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001249 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001250 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001251 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1252 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001253 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001254 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001255 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1256 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001257
1258 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001259 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001260 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 for zinfo in self.filelist:
1262 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001263 # Read by chunks, to avoid an OverflowError or a
1264 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001265 with self.open(zinfo.filename, "r") as f:
1266 while f.read(chunk_size): # Check CRC-32
1267 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001268 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001269 return zinfo.filename
1270
1271 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001272 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001273 info = self.NameToInfo.get(name)
1274 if info is None:
1275 raise KeyError(
1276 'There is no item named %r in the archive' % name)
1277
1278 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001279
Thomas Wouterscf297e42007-02-23 15:07:44 +00001280 def setpassword(self, pwd):
1281 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001282 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001283 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001284 if pwd:
1285 self.pwd = pwd
1286 else:
1287 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001288
R David Murrayf50b38a2012-04-12 18:44:58 -04001289 @property
1290 def comment(self):
1291 """The comment text associated with the ZIP file."""
1292 return self._comment
1293
1294 @comment.setter
1295 def comment(self, comment):
1296 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001297 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001298 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001299 if len(comment) > ZIP_MAX_COMMENT:
1300 import warnings
1301 warnings.warn('Archive comment is too long; truncating to %d bytes'
1302 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001303 comment = comment[:ZIP_MAX_COMMENT]
1304 self._comment = comment
1305 self._didModify = True
1306
Thomas Wouterscf297e42007-02-23 15:07:44 +00001307 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001308 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001309 with self.open(name, "r", pwd) as fp:
1310 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001311
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001312 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001313 """Return file-like object for 'name'.
1314
1315 name is a string for the file name within the ZIP file, or a ZipInfo
1316 object.
1317
1318 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1319 write to a file newly added to the archive.
1320
1321 pwd is the password to decrypt files (only used for reading).
1322
1323 When writing, if the file size is not known in advance but may exceed
1324 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1325 files. If the size is known in advance, it is best to pass a ZipInfo
1326 instance for name, with zinfo.file_size set.
1327 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001328 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001329 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001330 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001331 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001332 if pwd and (mode == "w"):
1333 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001334 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001335 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001336 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001337
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001338 # Make sure we have an info object
1339 if isinstance(name, ZipInfo):
1340 # 'name' is already an info object
1341 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001342 elif mode == 'w':
1343 zinfo = ZipInfo(name)
1344 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001345 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001346 # Get info object for name
1347 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001348
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001349 if mode == 'w':
1350 return self._open_to_write(zinfo, force_zip64=force_zip64)
1351
1352 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001353 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001354 "is an open writing handle on it. "
1355 "Close the writing handle before trying to read.")
1356
1357 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001358 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001359 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1360 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001361 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001362 # Skip the file header:
1363 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001364 if len(fheader) != sizeFileHeader:
1365 raise BadZipFile("Truncated file header")
1366 fheader = struct.unpack(structFileHeader, fheader)
1367 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001368 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001369
Antoine Pitrou17babc52012-11-17 23:50:08 +01001370 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1371 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1372 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001373
Antoine Pitrou8572da52012-11-17 23:52:05 +01001374 if zinfo.flag_bits & 0x20:
1375 # Zip 2.7: compressed patched data
1376 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001377
Antoine Pitrou8572da52012-11-17 23:52:05 +01001378 if zinfo.flag_bits & 0x40:
1379 # strong encryption
1380 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001381
Antoine Pitrou17babc52012-11-17 23:50:08 +01001382 if zinfo.flag_bits & 0x800:
1383 # UTF-8 filename
1384 fname_str = fname.decode("utf-8")
1385 else:
1386 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001387
Antoine Pitrou17babc52012-11-17 23:50:08 +01001388 if fname_str != zinfo.orig_filename:
1389 raise BadZipFile(
1390 'File name in directory %r and header %r differ.'
1391 % (zinfo.orig_filename, fname))
1392
1393 # check for encrypted flag & handle password
1394 is_encrypted = zinfo.flag_bits & 0x1
1395 zd = None
1396 if is_encrypted:
1397 if not pwd:
1398 pwd = self.pwd
1399 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001400 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001401 "required for extraction" % name)
1402
1403 zd = _ZipDecrypter(pwd)
1404 # The first 12 bytes in the cypher stream is an encryption header
1405 # used to strengthen the algorithm. The first 11 bytes are
1406 # completely random, while the 12th contains the MSB of the CRC,
1407 # or the MSB of the file time depending on the header type
1408 # and is used to check the correctness of the password.
1409 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001410 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001411 if zinfo.flag_bits & 0x8:
1412 # compare against the file type from extended local headers
1413 check_byte = (zinfo._raw_time >> 8) & 0xff
1414 else:
1415 # compare against the CRC otherwise
1416 check_byte = (zinfo.CRC >> 24) & 0xff
1417 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001418 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001419
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001420 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001421 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001422 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001423 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001424
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001425 def _open_to_write(self, zinfo, force_zip64=False):
1426 if force_zip64 and not self._allowZip64:
1427 raise ValueError(
1428 "force_zip64 is True, but allowZip64 was False when opening "
1429 "the ZIP file."
1430 )
1431 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001432 raise ValueError("Can't write to the ZIP file while there is "
1433 "another write handle open on it. "
1434 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001435
1436 # Sizes and CRC are overwritten with correct data after processing the file
1437 if not hasattr(zinfo, 'file_size'):
1438 zinfo.file_size = 0
1439 zinfo.compress_size = 0
1440 zinfo.CRC = 0
1441
1442 zinfo.flag_bits = 0x00
1443 if zinfo.compress_type == ZIP_LZMA:
1444 # Compressed data includes an end-of-stream (EOS) marker
1445 zinfo.flag_bits |= 0x02
1446 if not self._seekable:
1447 zinfo.flag_bits |= 0x08
1448
1449 if not zinfo.external_attr:
1450 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1451
1452 # Compressed size can be larger than uncompressed size
1453 zip64 = self._allowZip64 and \
1454 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1455
1456 if self._seekable:
1457 self.fp.seek(self.start_dir)
1458 zinfo.header_offset = self.fp.tell()
1459
1460 self._writecheck(zinfo)
1461 self._didModify = True
1462
1463 self.fp.write(zinfo.FileHeader(zip64))
1464
1465 self._writing = True
1466 return _ZipWriteFile(self, zinfo, zip64)
1467
Christian Heimes790c8232008-01-07 21:14:23 +00001468 def extract(self, member, path=None, pwd=None):
1469 """Extract a member from the archive to the current working directory,
1470 using its full name. Its file information is extracted as accurately
1471 as possible. `member' may be a filename or a ZipInfo object. You can
1472 specify a different directory using `path'.
1473 """
Christian Heimes790c8232008-01-07 21:14:23 +00001474 if path is None:
1475 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001476 else:
1477 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001478
1479 return self._extract_member(member, path, pwd)
1480
1481 def extractall(self, path=None, members=None, pwd=None):
1482 """Extract all members from the archive to the current working
1483 directory. `path' specifies a different directory to extract to.
1484 `members' is optional and must be a subset of the list returned
1485 by namelist().
1486 """
1487 if members is None:
1488 members = self.namelist()
1489
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001490 if path is None:
1491 path = os.getcwd()
1492 else:
1493 path = os.fspath(path)
1494
Christian Heimes790c8232008-01-07 21:14:23 +00001495 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001496 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001497
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001498 @classmethod
1499 def _sanitize_windows_name(cls, arcname, pathsep):
1500 """Replace bad characters and remove trailing dots from parts."""
1501 table = cls._windows_illegal_name_trans_table
1502 if not table:
1503 illegal = ':<>|"?*'
1504 table = str.maketrans(illegal, '_' * len(illegal))
1505 cls._windows_illegal_name_trans_table = table
1506 arcname = arcname.translate(table)
1507 # remove trailing dots
1508 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1509 # rejoin, removing empty parts.
1510 arcname = pathsep.join(x for x in arcname if x)
1511 return arcname
1512
Christian Heimes790c8232008-01-07 21:14:23 +00001513 def _extract_member(self, member, targetpath, pwd):
1514 """Extract the ZipInfo object 'member' to a physical
1515 file on the path targetpath.
1516 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001517 if not isinstance(member, ZipInfo):
1518 member = self.getinfo(member)
1519
Christian Heimes790c8232008-01-07 21:14:23 +00001520 # build the destination pathname, replacing
1521 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001522 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001523
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001524 if os.path.altsep:
1525 arcname = arcname.replace(os.path.altsep, os.path.sep)
1526 # interpret absolute pathname as relative, remove drive letter or
1527 # UNC path, redundant separators, "." and ".." components.
1528 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001529 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001530 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001531 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001532 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001533 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001534 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001535
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001536 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001537 targetpath = os.path.normpath(targetpath)
1538
1539 # Create all upper directories if necessary.
1540 upperdirs = os.path.dirname(targetpath)
1541 if upperdirs and not os.path.exists(upperdirs):
1542 os.makedirs(upperdirs)
1543
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001544 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001545 if not os.path.isdir(targetpath):
1546 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001547 return targetpath
1548
Antoine Pitrou17babc52012-11-17 23:50:08 +01001549 with self.open(member, pwd=pwd) as source, \
1550 open(targetpath, "wb") as target:
1551 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001552
1553 return targetpath
1554
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001555 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001556 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001557 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001558 import warnings
1559 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001560 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001561 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001562 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001563 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001564 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001565 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001566 if not self._allowZip64:
1567 requires_zip64 = None
1568 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1569 requires_zip64 = "Files count"
1570 elif zinfo.file_size > ZIP64_LIMIT:
1571 requires_zip64 = "Filesize"
1572 elif zinfo.header_offset > ZIP64_LIMIT:
1573 requires_zip64 = "Zipfile size"
1574 if requires_zip64:
1575 raise LargeZipFile(requires_zip64 +
1576 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001577
1578 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001579 """Put the bytes from filename into the archive under the name
1580 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001581 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001582 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001583 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001584 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001585 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001586 "Can't write to ZIP archive while an open writing handle exists"
1587 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001588
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001589 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001590
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001591 if zinfo.is_dir():
1592 zinfo.compress_size = 0
1593 zinfo.CRC = 0
1594 else:
1595 if compress_type is not None:
1596 zinfo.compress_type = compress_type
1597 else:
1598 zinfo.compress_type = self.compression
1599
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001600 if zinfo.is_dir():
1601 with self._lock:
1602 if self._seekable:
1603 self.fp.seek(self.start_dir)
1604 zinfo.header_offset = self.fp.tell() # Start of header bytes
1605 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001606 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001607 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001608
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001609 self._writecheck(zinfo)
1610 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001611
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001612 self.filelist.append(zinfo)
1613 self.NameToInfo[zinfo.filename] = zinfo
1614 self.fp.write(zinfo.FileHeader(False))
1615 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001616 else:
1617 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1618 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001619
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001620 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001621 """Write a file into the archive. The contents is 'data', which
1622 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1623 it is encoded as UTF-8 first.
1624 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001625 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001626 if isinstance(data, str):
1627 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001628 if not isinstance(zinfo_or_arcname, ZipInfo):
1629 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001630 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001631 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001632 if zinfo.filename[-1] == '/':
1633 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1634 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1635 else:
1636 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001637 else:
1638 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001639
1640 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001641 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001642 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001643 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001644 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001645 "Can't write to ZIP archive while an open writing handle exists."
1646 )
1647
1648 if compress_type is not None:
1649 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001650
Guido van Rossum85825dc2007-08-27 17:03:28 +00001651 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001652 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001653 with self.open(zinfo, mode='w') as dest:
1654 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001655
1656 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001657 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001658 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001659
1660 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001661 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001662 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001663 if self.fp is None:
1664 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001665
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001666 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001667 raise ValueError("Can't close the ZIP file while there is "
1668 "an open writing handle on it. "
1669 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001670
Antoine Pitrou17babc52012-11-17 23:50:08 +01001671 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001672 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001673 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001674 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001675 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001676 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001677 finally:
1678 fp = self.fp
1679 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001680 self._fpclose(fp)
1681
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001682 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001683 for zinfo in self.filelist: # write central directory
1684 dt = zinfo.date_time
1685 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1686 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1687 extra = []
1688 if zinfo.file_size > ZIP64_LIMIT \
1689 or zinfo.compress_size > ZIP64_LIMIT:
1690 extra.append(zinfo.file_size)
1691 extra.append(zinfo.compress_size)
1692 file_size = 0xffffffff
1693 compress_size = 0xffffffff
1694 else:
1695 file_size = zinfo.file_size
1696 compress_size = zinfo.compress_size
1697
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001698 if zinfo.header_offset > ZIP64_LIMIT:
1699 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001700 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001701 else:
1702 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001703
1704 extra_data = zinfo.extra
1705 min_version = 0
1706 if extra:
1707 # Append a ZIP64 field to the extra's
1708 extra_data = struct.pack(
1709 '<HH' + 'Q'*len(extra),
1710 1, 8*len(extra), *extra) + extra_data
1711
1712 min_version = ZIP64_VERSION
1713
1714 if zinfo.compress_type == ZIP_BZIP2:
1715 min_version = max(BZIP2_VERSION, min_version)
1716 elif zinfo.compress_type == ZIP_LZMA:
1717 min_version = max(LZMA_VERSION, min_version)
1718
1719 extract_version = max(min_version, zinfo.extract_version)
1720 create_version = max(min_version, zinfo.create_version)
1721 try:
1722 filename, flag_bits = zinfo._encodeFilenameFlags()
1723 centdir = struct.pack(structCentralDir,
1724 stringCentralDir, create_version,
1725 zinfo.create_system, extract_version, zinfo.reserved,
1726 flag_bits, zinfo.compress_type, dostime, dosdate,
1727 zinfo.CRC, compress_size, file_size,
1728 len(filename), len(extra_data), len(zinfo.comment),
1729 0, zinfo.internal_attr, zinfo.external_attr,
1730 header_offset)
1731 except DeprecationWarning:
1732 print((structCentralDir, stringCentralDir, create_version,
1733 zinfo.create_system, extract_version, zinfo.reserved,
1734 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1735 zinfo.CRC, compress_size, file_size,
1736 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1737 0, zinfo.internal_attr, zinfo.external_attr,
1738 header_offset), file=sys.stderr)
1739 raise
1740 self.fp.write(centdir)
1741 self.fp.write(filename)
1742 self.fp.write(extra_data)
1743 self.fp.write(zinfo.comment)
1744
1745 pos2 = self.fp.tell()
1746 # Write end-of-zip-archive record
1747 centDirCount = len(self.filelist)
1748 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001749 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001750 requires_zip64 = None
1751 if centDirCount > ZIP_FILECOUNT_LIMIT:
1752 requires_zip64 = "Files count"
1753 elif centDirOffset > ZIP64_LIMIT:
1754 requires_zip64 = "Central directory offset"
1755 elif centDirSize > ZIP64_LIMIT:
1756 requires_zip64 = "Central directory size"
1757 if requires_zip64:
1758 # Need to write the ZIP64 end-of-archive records
1759 if not self._allowZip64:
1760 raise LargeZipFile(requires_zip64 +
1761 " would require ZIP64 extensions")
1762 zip64endrec = struct.pack(
1763 structEndArchive64, stringEndArchive64,
1764 44, 45, 45, 0, 0, centDirCount, centDirCount,
1765 centDirSize, centDirOffset)
1766 self.fp.write(zip64endrec)
1767
1768 zip64locrec = struct.pack(
1769 structEndArchive64Locator,
1770 stringEndArchive64Locator, 0, pos2, 1)
1771 self.fp.write(zip64locrec)
1772 centDirCount = min(centDirCount, 0xFFFF)
1773 centDirSize = min(centDirSize, 0xFFFFFFFF)
1774 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1775
1776 endrec = struct.pack(structEndArchive, stringEndArchive,
1777 0, 0, centDirCount, centDirCount,
1778 centDirSize, centDirOffset, len(self._comment))
1779 self.fp.write(endrec)
1780 self.fp.write(self._comment)
1781 self.fp.flush()
1782
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001783 def _fpclose(self, fp):
1784 assert self._fileRefCnt > 0
1785 self._fileRefCnt -= 1
1786 if not self._fileRefCnt and not self._filePassed:
1787 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001788
1789
1790class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001791 """Class to create ZIP archives with Python library files and packages."""
1792
Georg Brandl8334fd92010-12-04 10:26:46 +00001793 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001794 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001795 ZipFile.__init__(self, file, mode=mode, compression=compression,
1796 allowZip64=allowZip64)
1797 self._optimize = optimize
1798
Christian Tismer59202e52013-10-21 03:59:23 +02001799 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001800 """Add all files from "pathname" to the ZIP archive.
1801
Fred Drake484d7352000-10-02 21:14:52 +00001802 If pathname is a package directory, search the directory and
1803 all package subdirectories recursively for all *.py and enter
1804 the modules into the archive. If pathname is a plain
1805 directory, listdir *.py and enter all modules. Else, pathname
1806 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001807 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001808 This method will compile the module.py into module.pyc if
1809 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001810 If filterfunc(pathname) is given, it is called with every argument.
1811 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001812 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001813 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001814 if filterfunc and not filterfunc(pathname):
1815 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001816 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001817 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001818 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001819 dir, name = os.path.split(pathname)
1820 if os.path.isdir(pathname):
1821 initname = os.path.join(pathname, "__init__.py")
1822 if os.path.isfile(initname):
1823 # This is a package directory, add it
1824 if basename:
1825 basename = "%s/%s" % (basename, name)
1826 else:
1827 basename = name
1828 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001829 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001830 fname, arcname = self._get_codename(initname[0:-3], basename)
1831 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001832 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001833 self.write(fname, arcname)
1834 dirlist = os.listdir(pathname)
1835 dirlist.remove("__init__.py")
1836 # Add all *.py files and package subdirectories
1837 for filename in dirlist:
1838 path = os.path.join(pathname, filename)
1839 root, ext = os.path.splitext(filename)
1840 if os.path.isdir(path):
1841 if os.path.isfile(os.path.join(path, "__init__.py")):
1842 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001843 self.writepy(path, basename,
1844 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001845 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001846 if filterfunc and not filterfunc(path):
1847 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001848 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001849 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001850 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001851 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001852 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001853 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001854 self.write(fname, arcname)
1855 else:
1856 # This is NOT a package directory, add its files at top level
1857 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001858 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001859 for filename in os.listdir(pathname):
1860 path = os.path.join(pathname, filename)
1861 root, ext = os.path.splitext(filename)
1862 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001863 if filterfunc and not filterfunc(path):
1864 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001865 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001866 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001867 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001868 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001869 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001870 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001871 self.write(fname, arcname)
1872 else:
1873 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001874 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001875 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001876 fname, arcname = self._get_codename(pathname[0:-3], basename)
1877 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001878 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001879 self.write(fname, arcname)
1880
1881 def _get_codename(self, pathname, basename):
1882 """Return (filename, archivename) for the path.
1883
Fred Drake484d7352000-10-02 21:14:52 +00001884 Given a module name path, return the correct file path and
1885 archive name, compiling if necessary. For example, given
1886 /python/lib/string, return (/python/lib/string.pyc, string).
1887 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001888 def _compile(file, optimize=-1):
1889 import py_compile
1890 if self.debug:
1891 print("Compiling", file)
1892 try:
1893 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001894 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001895 print(err.msg)
1896 return False
1897 return True
1898
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001899 file_py = pathname + ".py"
1900 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001901 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1902 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1903 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001904 if self._optimize == -1:
1905 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001906 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001907 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1908 # Use .pyc file.
1909 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001910 elif (os.path.isfile(pycache_opt0) and
1911 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001912 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1913 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001914 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001915 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001916 elif (os.path.isfile(pycache_opt1) and
1917 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1918 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001919 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001920 fname = pycache_opt1
1921 arcname = file_pyc
1922 elif (os.path.isfile(pycache_opt2) and
1923 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1924 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1925 # file name in the archive.
1926 fname = pycache_opt2
1927 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001928 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001929 # Compile py into PEP 3147 pyc file.
1930 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001931 if sys.flags.optimize == 0:
1932 fname = pycache_opt0
1933 elif sys.flags.optimize == 1:
1934 fname = pycache_opt1
1935 else:
1936 fname = pycache_opt2
1937 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001938 else:
1939 fname = arcname = file_py
1940 else:
1941 # new mode: use given optimization level
1942 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001943 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001944 arcname = file_pyc
1945 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001946 arcname = file_pyc
1947 if self._optimize == 1:
1948 fname = pycache_opt1
1949 elif self._optimize == 2:
1950 fname = pycache_opt2
1951 else:
1952 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1953 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001954 if not (os.path.isfile(fname) and
1955 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1956 if not _compile(file_py, optimize=self._optimize):
1957 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001958 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001959 if basename:
1960 archivename = "%s/%s" % (basename, archivename)
1961 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001962
1963
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001964def main(args=None):
1965 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001966
Serhiy Storchaka150cd192017-04-07 18:56:12 +03001967 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001968 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03001969 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001970 group.add_argument('-l', '--list', metavar='<zipfile>',
1971 help='Show listing of a zipfile')
1972 group.add_argument('-e', '--extract', nargs=2,
1973 metavar=('<zipfile>', '<output_dir>'),
1974 help='Extract zipfile into target dir')
1975 group.add_argument('-c', '--create', nargs='+',
1976 metavar=('<name>', '<file>'),
1977 help='Create zipfile from sources')
1978 group.add_argument('-t', '--test', metavar='<zipfile>',
1979 help='Test if a zipfile is valid')
1980 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001981
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001982 if args.test is not None:
1983 src = args.test
1984 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001985 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001986 if badfile:
1987 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001988 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001989
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001990 elif args.list is not None:
1991 src = args.list
1992 with ZipFile(src, 'r') as zf:
1993 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001994
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001995 elif args.extract is not None:
1996 src, curdir = args.extract
1997 with ZipFile(src, 'r') as zf:
1998 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001999
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002000 elif args.create is not None:
2001 zip_name = args.create.pop(0)
2002 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002003
2004 def addToZip(zf, path, zippath):
2005 if os.path.isfile(path):
2006 zf.write(path, zippath, ZIP_DEFLATED)
2007 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002008 if zippath:
2009 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002010 for nm in os.listdir(path):
2011 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002012 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002013 # else: ignore
2014
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002015 with ZipFile(zip_name, 'w') as zf:
2016 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002017 zippath = os.path.basename(path)
2018 if not zippath:
2019 zippath = os.path.basename(os.path.dirname(path))
2020 if zippath in ('', os.curdir, os.pardir):
2021 zippath = ''
2022 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002023
2024if __name__ == "__main__":
2025 main()