blob: 8a19ca246b6038b5906ab3bdd6ec15637018bf60 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
15
Serhiy Storchaka9e777732015-10-10 19:43:32 +030016try:
17 import threading
18except ImportError:
19 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020
21try:
Tim Peterse1190062001-01-15 03:34:38 +000022 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040024except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000026 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028try:
29 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040030except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020031 bz2 = None
32
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033try:
34 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040035except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 lzma = None
37
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020038__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020039 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000040 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000041
Georg Brandl4d540882010-10-28 06:42:33 +000042class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000043 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
45
46class LargeZipFile(Exception):
47 """
48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
49 and those extensions are disabled.
50 """
51
Georg Brandl4d540882010-10-28 06:42:33 +000052error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
53
Guido van Rossum32abe6f2000-03-31 17:30:02 +000054
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000055ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030056ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000057ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000058
Guido van Rossum32abe6f2000-03-31 17:30:02 +000059# constants for Zip file compression methods
60ZIP_STORED = 0
61ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020063ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000064# Other ZIP compression methods not supported
65
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020066DEFAULT_VERSION = 20
67ZIP64_VERSION = 45
68BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020070# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020071MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020072
Martin v. Löwisb09b8442008-07-03 14:13:42 +000073# Below are some formats and associated data for reading/writing headers using
74# the struct module. The names and structures of headers/records are those used
75# in the PKWARE description of the ZIP file format:
76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
77# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000078
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079# The "end of central directory" structure, magic number, size, and indices
80# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000081structEndArchive = b"<4s4H2LH"
82stringEndArchive = b"PK\005\006"
83sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000084
85_ECD_SIGNATURE = 0
86_ECD_DISK_NUMBER = 1
87_ECD_DISK_START = 2
88_ECD_ENTRIES_THIS_DISK = 3
89_ECD_ENTRIES_TOTAL = 4
90_ECD_SIZE = 5
91_ECD_OFFSET = 6
92_ECD_COMMENT_SIZE = 7
93# These last two indices are not part of the structure as defined in the
94# spec, but they are used internally by this module as a convenience
95_ECD_COMMENT = 8
96_ECD_LOCATION = 9
97
98# The "central directory" structure, magic number, size, and indices
99# of entries in the structure (section V.F in the format document)
100structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000101stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102sizeCentralDir = struct.calcsize(structCentralDir)
103
Fred Drake3e038e52001-02-28 17:56:26 +0000104# indexes of entries in the central directory structure
105_CD_SIGNATURE = 0
106_CD_CREATE_VERSION = 1
107_CD_CREATE_SYSTEM = 2
108_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000110_CD_FLAG_BITS = 5
111_CD_COMPRESS_TYPE = 6
112_CD_TIME = 7
113_CD_DATE = 8
114_CD_CRC = 9
115_CD_COMPRESSED_SIZE = 10
116_CD_UNCOMPRESSED_SIZE = 11
117_CD_FILENAME_LENGTH = 12
118_CD_EXTRA_FIELD_LENGTH = 13
119_CD_COMMENT_LENGTH = 14
120_CD_DISK_NUMBER_START = 15
121_CD_INTERNAL_FILE_ATTRIBUTES = 16
122_CD_EXTERNAL_FILE_ATTRIBUTES = 17
123_CD_LOCAL_HEADER_OFFSET = 18
124
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125# The "local file header" structure, magic number, size, and indices
126# (section V.A in the format document)
127structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129sizeFileHeader = struct.calcsize(structFileHeader)
130
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_SIGNATURE = 0
132_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000133_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000134_FH_GENERAL_PURPOSE_FLAG_BITS = 3
135_FH_COMPRESSION_METHOD = 4
136_FH_LAST_MOD_TIME = 5
137_FH_LAST_MOD_DATE = 6
138_FH_CRC = 7
139_FH_COMPRESSED_SIZE = 8
140_FH_UNCOMPRESSED_SIZE = 9
141_FH_FILENAME_LENGTH = 10
142_FH_EXTRA_FIELD_LENGTH = 11
143
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000145structEndArchive64Locator = "<4sLQL"
146stringEndArchive64Locator = b"PK\x06\x07"
147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000148
149# The "Zip64 end of central directory" record, magic number, size, and indices
150# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000151structEndArchive64 = "<4sQ2H2L4Q"
152stringEndArchive64 = b"PK\x06\x06"
153sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000154
155_CD64_SIGNATURE = 0
156_CD64_DIRECTORY_RECSIZE = 1
157_CD64_CREATE_VERSION = 2
158_CD64_EXTRACT_VERSION = 3
159_CD64_DISK_NUMBER = 4
160_CD64_DISK_NUMBER_START = 5
161_CD64_NUMBER_ENTRIES_THIS_DISK = 6
162_CD64_NUMBER_ENTRIES_TOTAL = 7
163_CD64_DIRECTORY_SIZE = 8
164_CD64_OFFSET_START_CENTDIR = 9
165
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000168 if _EndRecData(fp):
169 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200170 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000172 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000173
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000174def is_zipfile(filename):
175 """Quickly see if a file is a ZIP file by checking the magic number.
176
177 The filename argument may be a file or file-like object too.
178 """
179 result = False
180 try:
181 if hasattr(filename, "read"):
182 result = _check_zipfile(fp=filename)
183 else:
184 with open(filename, "rb") as fp:
185 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200186 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000187 pass
188 return result
189
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000190def _EndRecData64(fpin, offset, endrec):
191 """
192 Read the ZIP64 end-of-archive records and use that to update endrec
193 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 try:
195 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200196 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000197 # If the seek fails, the file is not large enough to contain a ZIP64
198 # end-of-archive record, so just return the end record we were given.
199 return endrec
200
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000201 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200202 if len(data) != sizeEndCentDir64Locator:
203 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000204 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
205 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206 return endrec
207
208 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000209 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000210
211 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000212 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
213 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200214 if len(data) != sizeEndCentDir64:
215 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200217 dircount, dircount2, dirsize, diroffset = \
218 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000219 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000220 return endrec
221
222 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000223 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000224 endrec[_ECD_DISK_NUMBER] = disk_num
225 endrec[_ECD_DISK_START] = disk_dir
226 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
227 endrec[_ECD_ENTRIES_TOTAL] = dircount2
228 endrec[_ECD_SIZE] = dirsize
229 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 return endrec
231
232
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233def _EndRecData(fpin):
234 """Return data from the "End of Central Directory" record, or None.
235
236 The data is a list of the nine items in the ZIP "End of central dir"
237 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238
239 # Determine file size
240 fpin.seek(0, 2)
241 filesize = fpin.tell()
242
243 # Check to see if this is ZIP file with no archive comment (the
244 # "end of central directory" structure should be the last item in the
245 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 try:
247 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200248 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000249 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200251 if (len(data) == sizeEndCentDir and
252 data[0:4] == stringEndArchive and
253 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000256 endrec=list(endrec)
257
258 # Append a blank comment and record start offset
259 endrec.append(b"")
260 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000262 # Try to read the "Zip64 end of central directory" structure
263 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000264
265 # Either this is not a ZIP file, or it is a ZIP file with an archive
266 # comment. Search the end of the file for the "end of central directory"
267 # record signature. The comment is the last item in the ZIP file and may be
268 # up to 64K long. It is assumed that the "end of central directory" magic
269 # number does not appear in the comment.
270 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
271 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000273 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000274 if start >= 0:
275 # found the magic number; attempt to unpack and interpret
276 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200277 if len(recData) != sizeEndCentDir:
278 # Zip file is corrupted.
279 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000280 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400281 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
282 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
283 endrec.append(comment)
284 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000285
R David Murray4fbb9db2011-06-09 15:50:51 -0400286 # Try to read the "Zip64 end of central directory" structure
287 return _EndRecData64(fpin, maxCommentStart + start - filesize,
288 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000289
290 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200291 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000292
Fred Drake484d7352000-10-02 21:14:52 +0000293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000295 """Class with attributes describing each file in the ZIP archive."""
296
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200298 'orig_filename',
299 'filename',
300 'date_time',
301 'compress_type',
302 'comment',
303 'extra',
304 'create_system',
305 'create_version',
306 'extract_version',
307 'reserved',
308 'flag_bits',
309 'volume',
310 'internal_attr',
311 'external_attr',
312 'header_offset',
313 'CRC',
314 'compress_size',
315 'file_size',
316 '_raw_time',
317 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000320 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000321
322 # Terminate the file name at the first null byte. Null bytes in file
323 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000324 null_byte = filename.find(chr(0))
325 if null_byte >= 0:
326 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000327 # This is used to ensure paths in generated ZIP files always use
328 # forward slashes as the directory separator, as required by the
329 # ZIP format specification.
330 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000331 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332
Greg Ward8e36d282003-06-18 00:53:06 +0000333 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000334 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800335
336 if date_time[0] < 1980:
337 raise ValueError('ZIP does not support timestamps before 1980')
338
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000340 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000341 self.comment = b"" # Comment for each file
342 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000343 if sys.platform == 'win32':
344 self.create_system = 0 # System which created ZIP archive
345 else:
346 # Assume everything else is unix-y
347 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200348 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
349 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000350 self.reserved = 0 # Must be zero
351 self.flag_bits = 0 # ZIP flag bits
352 self.volume = 0 # Volume number of file header
353 self.internal_attr = 0 # Internal attributes
354 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000356 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000357 # CRC CRC-32 of the uncompressed file
358 # compress_size Size of the compressed file
359 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000360
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200361 def __repr__(self):
362 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
363 if self.compress_type != ZIP_STORED:
364 result.append(' compress_type=%s' %
365 compressor_names.get(self.compress_type,
366 self.compress_type))
367 hi = self.external_attr >> 16
368 lo = self.external_attr & 0xFFFF
369 if hi:
370 result.append(' filemode=%r' % stat.filemode(hi))
371 if lo:
372 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200373 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200374 if not isdir or self.file_size:
375 result.append(' file_size=%r' % self.file_size)
376 if ((not isdir or self.compress_size) and
377 (self.compress_type != ZIP_STORED or
378 self.file_size != self.compress_size)):
379 result.append(' compress_size=%r' % self.compress_size)
380 result.append('>')
381 return ''.join(result)
382
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200383 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000384 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 dt = self.date_time
386 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000387 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000389 # Set these to zero because we write them after the file data
390 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391 else:
Tim Peterse1190062001-01-15 03:34:38 +0000392 CRC = self.CRC
393 compress_size = self.compress_size
394 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395
396 extra = self.extra
397
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200398 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200399 if zip64 is None:
400 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
401 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000402 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000403 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200404 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200405 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
406 if not zip64:
407 raise LargeZipFile("Filesize would require ZIP64 extensions")
408 # File is larger than what fits into a 4 byte integer,
409 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000410 file_size = 0xffffffff
411 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200414 if self.compress_type == ZIP_BZIP2:
415 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200416 elif self.compress_type == ZIP_LZMA:
417 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200418
419 self.extract_version = max(min_version, self.extract_version)
420 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000421 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000422 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200423 self.extract_version, self.reserved, flag_bits,
424 self.compress_type, dostime, dosdate, CRC,
425 compress_size, file_size,
426 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000427 return header + filename + extra
428
429 def _encodeFilenameFlags(self):
430 try:
431 return self.filename.encode('ascii'), self.flag_bits
432 except UnicodeEncodeError:
433 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000434
435 def _decodeExtra(self):
436 # Try to decode the extra field.
437 extra = self.extra
438 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700439 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000440 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200441 if ln+4 > len(extra):
442 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
443 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000447 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000449 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450 elif ln == 0:
451 counts = ()
452 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300453 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000454
455 idx = 0
456
457 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000458 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.file_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 self.compress_size = counts[idx]
464 idx += 1
465
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000466 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467 old = self.header_offset
468 self.header_offset = counts[idx]
469 idx+=1
470
471 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000472
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200473 @classmethod
474 def from_file(cls, filename, arcname=None):
475 """Construct an appropriate ZipInfo for a file on the filesystem.
476
477 filename should be the path to a file or directory on the filesystem.
478
479 arcname is the name which it will have within the archive (by default,
480 this will be the same as filename, but without a drive letter and with
481 leading path separators removed).
482 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200483 if isinstance(filename, os.PathLike):
484 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200485 st = os.stat(filename)
486 isdir = stat.S_ISDIR(st.st_mode)
487 mtime = time.localtime(st.st_mtime)
488 date_time = mtime[0:6]
489 # Create ZipInfo instance to store file information
490 if arcname is None:
491 arcname = filename
492 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
493 while arcname[0] in (os.sep, os.altsep):
494 arcname = arcname[1:]
495 if isdir:
496 arcname += '/'
497 zinfo = cls(arcname, date_time)
498 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
499 if isdir:
500 zinfo.file_size = 0
501 zinfo.external_attr |= 0x10 # MS-DOS directory flag
502 else:
503 zinfo.file_size = st.st_size
504
505 return zinfo
506
507 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300508 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 return self.filename[-1] == '/'
510
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000511
Thomas Wouterscf297e42007-02-23 15:07:44 +0000512class _ZipDecrypter:
513 """Class to handle decryption of files stored within a ZIP archive.
514
515 ZIP supports a password-based form of encryption. Even though known
516 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000517 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000518
519 Usage:
520 zd = _ZipDecrypter(mypwd)
521 plain_char = zd(cypher_char)
522 plain_text = map(zd, cypher_text)
523 """
524
525 def _GenerateCRCTable():
526 """Generate a CRC-32 table.
527
528 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
529 internal keys. We noticed that a direct implementation is faster than
530 relying on binascii.crc32().
531 """
532 poly = 0xedb88320
533 table = [0] * 256
534 for i in range(256):
535 crc = i
536 for j in range(8):
537 if crc & 1:
538 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
539 else:
540 crc = ((crc >> 1) & 0x7FFFFFFF)
541 table[i] = crc
542 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500543 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
545 def _crc32(self, ch, crc):
546 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000547 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000548
549 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500550 if _ZipDecrypter.crctable is None:
551 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000552 self.key0 = 305419896
553 self.key1 = 591751049
554 self.key2 = 878082192
555 for p in pwd:
556 self._UpdateKeys(p)
557
558 def _UpdateKeys(self, c):
559 self.key0 = self._crc32(c, self.key0)
560 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
561 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000562 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563
564 def __call__(self, c):
565 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000566 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567 k = self.key2 | 2
568 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000569 self._UpdateKeys(c)
570 return c
571
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200572
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200573class LZMACompressor:
574
575 def __init__(self):
576 self._comp = None
577
578 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200579 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200580 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200581 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200582 ])
583 return struct.pack('<BBH', 9, 4, len(props)) + props
584
585 def compress(self, data):
586 if self._comp is None:
587 return self._init() + self._comp.compress(data)
588 return self._comp.compress(data)
589
590 def flush(self):
591 if self._comp is None:
592 return self._init() + self._comp.flush()
593 return self._comp.flush()
594
595
596class LZMADecompressor:
597
598 def __init__(self):
599 self._decomp = None
600 self._unconsumed = b''
601 self.eof = False
602
603 def decompress(self, data):
604 if self._decomp is None:
605 self._unconsumed += data
606 if len(self._unconsumed) <= 4:
607 return b''
608 psize, = struct.unpack('<H', self._unconsumed[2:4])
609 if len(self._unconsumed) <= 4 + psize:
610 return b''
611
612 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200613 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
614 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200615 ])
616 data = self._unconsumed[4 + psize:]
617 del self._unconsumed
618
619 result = self._decomp.decompress(data)
620 self.eof = self._decomp.eof
621 return result
622
623
624compressor_names = {
625 0: 'store',
626 1: 'shrink',
627 2: 'reduce',
628 3: 'reduce',
629 4: 'reduce',
630 5: 'reduce',
631 6: 'implode',
632 7: 'tokenize',
633 8: 'deflate',
634 9: 'deflate64',
635 10: 'implode',
636 12: 'bzip2',
637 14: 'lzma',
638 18: 'terse',
639 19: 'lz77',
640 97: 'wavpack',
641 98: 'ppmd',
642}
643
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200644def _check_compression(compression):
645 if compression == ZIP_STORED:
646 pass
647 elif compression == ZIP_DEFLATED:
648 if not zlib:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200651 elif compression == ZIP_BZIP2:
652 if not bz2:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200655 elif compression == ZIP_LZMA:
656 if not lzma:
657 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200658 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200659 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300660 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200661
662
663def _get_compressor(compress_type):
664 if compress_type == ZIP_DEFLATED:
665 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200666 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 elif compress_type == ZIP_BZIP2:
668 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200669 elif compress_type == ZIP_LZMA:
670 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200671 else:
672 return None
673
674
675def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200676 if compress_type == ZIP_STORED:
677 return None
678 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200679 return zlib.decompressobj(-15)
680 elif compress_type == ZIP_BZIP2:
681 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200682 elif compress_type == ZIP_LZMA:
683 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200685 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200686 if descr:
687 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
688 else:
689 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690
691
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200692class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300693 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200694 self._file = file
695 self._pos = pos
696 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200697 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300698 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200699
700 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200701 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300702 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300703 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300704 "is an open writing handle on it. "
705 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200706 self._file.seek(self._pos)
707 data = self._file.read(n)
708 self._pos = self._file.tell()
709 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200710
711 def close(self):
712 if self._file is not None:
713 fileobj = self._file
714 self._file = None
715 self._close(fileobj)
716
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200717# Provide the tell method for unseekable stream
718class _Tellable:
719 def __init__(self, fp):
720 self.fp = fp
721 self.offset = 0
722
723 def write(self, data):
724 n = self.fp.write(data)
725 self.offset += n
726 return n
727
728 def tell(self):
729 return self.offset
730
731 def flush(self):
732 self.fp.flush()
733
734 def close(self):
735 self.fp.close()
736
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200737
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000738class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000739 """File-like object for reading an archive member.
740 Is returned by ZipFile.open().
741 """
742
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000743 # Max size supported by decompressor.
744 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000745
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000746 # Read from compressed files in 4k blocks.
747 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000748
Łukasz Langae94980a2010-11-22 23:31:26 +0000749 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
750 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000751 self._fileobj = fileobj
752 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000753 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000754
Ezio Melotti92b47432010-01-28 01:44:41 +0000755 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000756 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200757 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000758
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200759 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000760
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200761 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000762 self._readbuffer = b''
763 self._offset = 0
764
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000765 self.newlines = None
766
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000767 # Adjust read size for encrypted files since the first 12 bytes
768 # are for the encryption/password information.
769 if self._decrypter is not None:
770 self._compress_left -= 12
771
772 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 self.name = zipinfo.filename
774
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000775 if hasattr(zipinfo, 'CRC'):
776 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000777 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000778 else:
779 self._expected_crc = None
780
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200781 def __repr__(self):
782 result = ['<%s.%s' % (self.__class__.__module__,
783 self.__class__.__qualname__)]
784 if not self.closed:
785 result.append(' name=%r mode=%r' % (self.name, self.mode))
786 if self._compress_type != ZIP_STORED:
787 result.append(' compress_type=%s' %
788 compressor_names.get(self._compress_type,
789 self._compress_type))
790 else:
791 result.append(' [closed]')
792 result.append('>')
793 return ''.join(result)
794
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000795 def readline(self, limit=-1):
796 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800
Serhiy Storchakae670be22016-06-11 19:32:44 +0300801 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 # Shortcut common case - newline found in buffer.
803 i = self._readbuffer.find(b'\n', self._offset) + 1
804 if i > 0:
805 line = self._readbuffer[self._offset: i]
806 self._offset = i
807 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808
Serhiy Storchakae670be22016-06-11 19:32:44 +0300809 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000810
811 def peek(self, n=1):
812 """Returns buffered bytes without advancing the position."""
813 if n > len(self._readbuffer) - self._offset:
814 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200815 if len(chunk) > self._offset:
816 self._readbuffer = chunk + self._readbuffer[self._offset:]
817 self._offset = 0
818 else:
819 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000820
821 # Return up to 512 bytes to reduce allocation overhead for tight loops.
822 return self._readbuffer[self._offset: self._offset + 512]
823
824 def readable(self):
825 return True
826
827 def read(self, n=-1):
828 """Read and return up to n bytes.
829 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200831 if n is None or n < 0:
832 buf = self._readbuffer[self._offset:]
833 self._readbuffer = b''
834 self._offset = 0
835 while not self._eof:
836 buf += self._read1(self.MAX_N)
837 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838
Antoine Pitrou78157b32012-06-23 16:44:48 +0200839 end = n + self._offset
840 if end < len(self._readbuffer):
841 buf = self._readbuffer[self._offset:end]
842 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200843 return buf
844
Antoine Pitrou78157b32012-06-23 16:44:48 +0200845 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200846 buf = self._readbuffer[self._offset:]
847 self._readbuffer = b''
848 self._offset = 0
849 while n > 0 and not self._eof:
850 data = self._read1(n)
851 if n < len(data):
852 self._readbuffer = data
853 self._offset = n
854 buf += data[:n]
855 break
856 buf += data
857 n -= len(data)
858 return buf
859
860 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000861 # Update the CRC using the given data.
862 if self._expected_crc is None:
863 # No need to compute the CRC if we don't have a reference value
864 return
Martin Panterb82032f2015-12-11 05:19:29 +0000865 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000866 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200867 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000868 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000869
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000870 def read1(self, n):
871 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200873 if n is None or n < 0:
874 buf = self._readbuffer[self._offset:]
875 self._readbuffer = b''
876 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300877 while not self._eof:
878 data = self._read1(self.MAX_N)
879 if data:
880 buf += data
881 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883
Antoine Pitrou78157b32012-06-23 16:44:48 +0200884 end = n + self._offset
885 if end < len(self._readbuffer):
886 buf = self._readbuffer[self._offset:end]
887 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200888 return buf
889
Antoine Pitrou78157b32012-06-23 16:44:48 +0200890 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200891 buf = self._readbuffer[self._offset:]
892 self._readbuffer = b''
893 self._offset = 0
894 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300895 while not self._eof:
896 data = self._read1(n)
897 if n < len(data):
898 self._readbuffer = data
899 self._offset = n
900 buf += data[:n]
901 break
902 if data:
903 buf += data
904 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200905 return buf
906
907 def _read1(self, n):
908 # Read up to n compressed bytes with at most one read() system call,
909 # decrypt and decompress them.
910 if self._eof or n <= 0:
911 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000913 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 if self._compress_type == ZIP_DEFLATED:
915 ## Handle unconsumed data.
916 data = self._decompressor.unconsumed_tail
917 if n > len(data):
918 data += self._read2(n - len(data))
919 else:
920 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200922 if self._compress_type == ZIP_STORED:
923 self._eof = self._compress_left <= 0
924 elif self._compress_type == ZIP_DEFLATED:
925 n = max(n, self.MIN_READ_SIZE)
926 data = self._decompressor.decompress(data, n)
927 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200928 self._compress_left <= 0 and
929 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200930 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000931 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200932 else:
933 data = self._decompressor.decompress(data)
934 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000935
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200936 data = data[:self._left]
937 self._left -= len(data)
938 if self._left <= 0:
939 self._eof = True
940 self._update_crc(data)
941 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000942
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200943 def _read2(self, n):
944 if self._compress_left <= 0:
945 return b''
946
947 n = max(n, self.MIN_READ_SIZE)
948 n = min(n, self._compress_left)
949
950 data = self._fileobj.read(n)
951 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200952 if not data:
953 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200954
955 if self._decrypter is not None:
956 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000957 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000958
Łukasz Langae94980a2010-11-22 23:31:26 +0000959 def close(self):
960 try:
961 if self._close_fileobj:
962 self._fileobj.close()
963 finally:
964 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000966
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300967class _ZipWriteFile(io.BufferedIOBase):
968 def __init__(self, zf, zinfo, zip64):
969 self._zinfo = zinfo
970 self._zip64 = zip64
971 self._zipfile = zf
972 self._compressor = _get_compressor(zinfo.compress_type)
973 self._file_size = 0
974 self._compress_size = 0
975 self._crc = 0
976
977 @property
978 def _fileobj(self):
979 return self._zipfile.fp
980
981 def writable(self):
982 return True
983
984 def write(self, data):
985 nbytes = len(data)
986 self._file_size += nbytes
987 self._crc = crc32(data, self._crc)
988 if self._compressor:
989 data = self._compressor.compress(data)
990 self._compress_size += len(data)
991 self._fileobj.write(data)
992 return nbytes
993
994 def close(self):
995 super().close()
996 # Flush any data from the compressor, and update header info
997 if self._compressor:
998 buf = self._compressor.flush()
999 self._compress_size += len(buf)
1000 self._fileobj.write(buf)
1001 self._zinfo.compress_size = self._compress_size
1002 else:
1003 self._zinfo.compress_size = self._file_size
1004 self._zinfo.CRC = self._crc
1005 self._zinfo.file_size = self._file_size
1006
1007 # Write updated header info
1008 if self._zinfo.flag_bits & 0x08:
1009 # Write CRC and file sizes after the file data
1010 fmt = '<LQQ' if self._zip64 else '<LLL'
1011 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1012 self._zinfo.compress_size, self._zinfo.file_size))
1013 self._zipfile.start_dir = self._fileobj.tell()
1014 else:
1015 if not self._zip64:
1016 if self._file_size > ZIP64_LIMIT:
1017 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1018 'limit')
1019 if self._compress_size > ZIP64_LIMIT:
1020 raise RuntimeError('Compressed size unexpectedly exceeded '
1021 'ZIP64 limit')
1022 # Seek backwards and write file header (which will now include
1023 # correct CRC and file sizes)
1024
1025 # Preserve current position in file
1026 self._zipfile.start_dir = self._fileobj.tell()
1027 self._fileobj.seek(self._zinfo.header_offset)
1028 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1029 self._fileobj.seek(self._zipfile.start_dir)
1030
1031 self._zipfile._writing = False
1032
1033 # Successfully written: Add file to our caches
1034 self._zipfile.filelist.append(self._zinfo)
1035 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1036
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001037class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001038 """ Class with methods to open, read, write, close, list zip files.
1039
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001040 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001041
Fred Drake3d9091e2001-03-26 15:49:24 +00001042 file: Either the path to the file, or a file-like object.
1043 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001044 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1045 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001046 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1047 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001048 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1049 needed, otherwise it will raise an exception when this would
1050 be necessary.
1051
Fred Drake3d9091e2001-03-26 15:49:24 +00001052 """
Fred Drake484d7352000-10-02 21:14:52 +00001053
Fred Drake90eac282001-02-28 05:29:34 +00001054 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001055 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001056
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001057 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001058 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1059 or append 'a'."""
1060 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001061 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001062
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001063 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001064
1065 self._allowZip64 = allowZip64
1066 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001067 self.debug = 0 # Level of printing: 0 through 3
1068 self.NameToInfo = {} # Find file info given name
1069 self.filelist = [] # List of ZipInfo instances for archive
1070 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001071 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001072 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001073 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001074
Fred Drake3d9091e2001-03-26 15:49:24 +00001075 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001076 if isinstance(file, os.PathLike):
1077 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001078 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001079 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001080 self._filePassed = 0
1081 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001082 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1083 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001084 filemode = modeDict[mode]
1085 while True:
1086 try:
1087 self.fp = io.open(file, filemode)
1088 except OSError:
1089 if filemode in modeDict:
1090 filemode = modeDict[filemode]
1091 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001092 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001093 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001094 else:
1095 self._filePassed = 1
1096 self.fp = file
1097 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001098 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001099 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001100 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001101 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001102
Antoine Pitrou17babc52012-11-17 23:50:08 +01001103 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001104 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001105 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001106 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001107 # set the modified flag so central directory gets written
1108 # even if no files are added to the archive
1109 self._didModify = True
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001110 self._start_disk = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001111 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001112 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001113 except (AttributeError, OSError):
1114 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001115 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001116 self._seekable = False
1117 else:
1118 # Some file-like objects can provide tell() but not seek()
1119 try:
1120 self.fp.seek(self.start_dir)
1121 except (AttributeError, OSError):
1122 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001123 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001124 try:
1125 # See if file is a zip file
1126 self._RealGetContents()
1127 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001128 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001129 except BadZipFile:
1130 # file is not a zip file, just append
1131 self.fp.seek(0, 2)
1132
1133 # set the modified flag so central directory gets written
1134 # even if no files are added to the archive
1135 self._didModify = True
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001136 self.start_dir = self._start_disk = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001137 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001138 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001139 except:
1140 fp = self.fp
1141 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001142 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001143 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001144
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001145 def __enter__(self):
1146 return self
1147
1148 def __exit__(self, type, value, traceback):
1149 self.close()
1150
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001151 def __repr__(self):
1152 result = ['<%s.%s' % (self.__class__.__module__,
1153 self.__class__.__qualname__)]
1154 if self.fp is not None:
1155 if self._filePassed:
1156 result.append(' file=%r' % self.fp)
1157 elif self.filename is not None:
1158 result.append(' filename=%r' % self.filename)
1159 result.append(' mode=%r' % self.mode)
1160 else:
1161 result.append(' [closed]')
1162 result.append('>')
1163 return ''.join(result)
1164
Tim Peters7d3bad62001-04-04 18:56:49 +00001165 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001166 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001167 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001168 try:
1169 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001170 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001171 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001172 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001173 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001174 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001175 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001176 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1177 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001178 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001179
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001180 # self._start_disk: Position of the start of ZIP archive
1181 # It is zero, unless ZIP was concatenated to another file
1182 self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001183 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1184 # If Zip64 extension structures are present, account for them
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001185 self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001186
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001187 if self.debug > 2:
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001188 inferred = self._start_disk + offset_cd
1189 print("given, inferred, offset", offset_cd, inferred, self._start_disk)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001190 # self.start_dir: Position of start of central directory
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001191 self.start_dir = offset_cd + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001193 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001194 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001195 total = 0
1196 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001197 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001198 if len(centdir) != sizeCentralDir:
1199 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001200 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001201 if centdir[_CD_SIGNATURE] != stringCentralDir:
1202 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001203 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001204 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001205 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001206 flags = centdir[5]
1207 if flags & 0x800:
1208 # UTF-8 file names extension
1209 filename = filename.decode('utf-8')
1210 else:
1211 # Historical ZIP filename encoding
1212 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001213 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001214 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001215 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1216 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001217 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001218 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001219 x.flag_bits, x.compress_type, t, d,
1220 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001221 if x.extract_version > MAX_EXTRACT_VERSION:
1222 raise NotImplementedError("zip file version %.1f" %
1223 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001224 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1225 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001226 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001228 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001229
1230 x._decodeExtra()
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001231 x.header_offset = x.header_offset + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001232 self.filelist.append(x)
1233 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001234
1235 # update total bytes read from central directory
1236 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1237 + centdir[_CD_EXTRA_FIELD_LENGTH]
1238 + centdir[_CD_COMMENT_LENGTH])
1239
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001240 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001241 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001242
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001243
1244 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001245 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001246 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001247
1248 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001249 """Return a list of class ZipInfo instances for files in the
1250 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001251 return self.filelist
1252
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001253 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001254 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001255 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1256 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001257 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001258 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001259 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1260 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261
1262 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001263 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001264 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001265 for zinfo in self.filelist:
1266 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001267 # Read by chunks, to avoid an OverflowError or a
1268 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001269 with self.open(zinfo.filename, "r") as f:
1270 while f.read(chunk_size): # Check CRC-32
1271 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001272 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001273 return zinfo.filename
1274
1275 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001276 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001277 info = self.NameToInfo.get(name)
1278 if info is None:
1279 raise KeyError(
1280 'There is no item named %r in the archive' % name)
1281
1282 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283
Thomas Wouterscf297e42007-02-23 15:07:44 +00001284 def setpassword(self, pwd):
1285 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001286 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001287 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001288 if pwd:
1289 self.pwd = pwd
1290 else:
1291 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001292
R David Murrayf50b38a2012-04-12 18:44:58 -04001293 @property
1294 def comment(self):
1295 """The comment text associated with the ZIP file."""
1296 return self._comment
1297
1298 @comment.setter
1299 def comment(self, comment):
1300 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001301 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001302 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001303 if len(comment) > ZIP_MAX_COMMENT:
1304 import warnings
1305 warnings.warn('Archive comment is too long; truncating to %d bytes'
1306 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001307 comment = comment[:ZIP_MAX_COMMENT]
1308 self._comment = comment
1309 self._didModify = True
1310
Thomas Wouterscf297e42007-02-23 15:07:44 +00001311 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001312 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001313 with self.open(name, "r", pwd) as fp:
1314 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001315
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001316 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001317 """Return file-like object for 'name'.
1318
1319 name is a string for the file name within the ZIP file, or a ZipInfo
1320 object.
1321
1322 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1323 write to a file newly added to the archive.
1324
1325 pwd is the password to decrypt files (only used for reading).
1326
1327 When writing, if the file size is not known in advance but may exceed
1328 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1329 files. If the size is known in advance, it is best to pass a ZipInfo
1330 instance for name, with zinfo.file_size set.
1331 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001332 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001333 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001334 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001335 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001336 if pwd and (mode == "w"):
1337 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001339 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001340 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001341
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001342 # Make sure we have an info object
1343 if isinstance(name, ZipInfo):
1344 # 'name' is already an info object
1345 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001346 elif mode == 'w':
1347 zinfo = ZipInfo(name)
1348 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001349 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001350 # Get info object for name
1351 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001352
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001353 if mode == 'w':
1354 return self._open_to_write(zinfo, force_zip64=force_zip64)
1355
1356 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001357 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001358 "is an open writing handle on it. "
1359 "Close the writing handle before trying to read.")
1360
1361 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001362 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001363 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1364 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001365 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001366 # Skip the file header:
1367 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001368 if len(fheader) != sizeFileHeader:
1369 raise BadZipFile("Truncated file header")
1370 fheader = struct.unpack(structFileHeader, fheader)
1371 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001372 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001373
Antoine Pitrou17babc52012-11-17 23:50:08 +01001374 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1375 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1376 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001377
Antoine Pitrou8572da52012-11-17 23:52:05 +01001378 if zinfo.flag_bits & 0x20:
1379 # Zip 2.7: compressed patched data
1380 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001381
Antoine Pitrou8572da52012-11-17 23:52:05 +01001382 if zinfo.flag_bits & 0x40:
1383 # strong encryption
1384 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001385
Antoine Pitrou17babc52012-11-17 23:50:08 +01001386 if zinfo.flag_bits & 0x800:
1387 # UTF-8 filename
1388 fname_str = fname.decode("utf-8")
1389 else:
1390 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001391
Antoine Pitrou17babc52012-11-17 23:50:08 +01001392 if fname_str != zinfo.orig_filename:
1393 raise BadZipFile(
1394 'File name in directory %r and header %r differ.'
1395 % (zinfo.orig_filename, fname))
1396
1397 # check for encrypted flag & handle password
1398 is_encrypted = zinfo.flag_bits & 0x1
1399 zd = None
1400 if is_encrypted:
1401 if not pwd:
1402 pwd = self.pwd
1403 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001404 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001405 "required for extraction" % name)
1406
1407 zd = _ZipDecrypter(pwd)
1408 # The first 12 bytes in the cypher stream is an encryption header
1409 # used to strengthen the algorithm. The first 11 bytes are
1410 # completely random, while the 12th contains the MSB of the CRC,
1411 # or the MSB of the file time depending on the header type
1412 # and is used to check the correctness of the password.
1413 header = zef_file.read(12)
1414 h = list(map(zd, header[0:12]))
1415 if zinfo.flag_bits & 0x8:
1416 # compare against the file type from extended local headers
1417 check_byte = (zinfo._raw_time >> 8) & 0xff
1418 else:
1419 # compare against the CRC otherwise
1420 check_byte = (zinfo.CRC >> 24) & 0xff
1421 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001422 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001423
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001424 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001425 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001426 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001427 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001428
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001429 def _open_to_write(self, zinfo, force_zip64=False):
1430 if force_zip64 and not self._allowZip64:
1431 raise ValueError(
1432 "force_zip64 is True, but allowZip64 was False when opening "
1433 "the ZIP file."
1434 )
1435 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001436 raise ValueError("Can't write to the ZIP file while there is "
1437 "another write handle open on it. "
1438 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001439
1440 # Sizes and CRC are overwritten with correct data after processing the file
1441 if not hasattr(zinfo, 'file_size'):
1442 zinfo.file_size = 0
1443 zinfo.compress_size = 0
1444 zinfo.CRC = 0
1445
1446 zinfo.flag_bits = 0x00
1447 if zinfo.compress_type == ZIP_LZMA:
1448 # Compressed data includes an end-of-stream (EOS) marker
1449 zinfo.flag_bits |= 0x02
1450 if not self._seekable:
1451 zinfo.flag_bits |= 0x08
1452
1453 if not zinfo.external_attr:
1454 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1455
1456 # Compressed size can be larger than uncompressed size
1457 zip64 = self._allowZip64 and \
1458 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1459
1460 if self._seekable:
1461 self.fp.seek(self.start_dir)
1462 zinfo.header_offset = self.fp.tell()
1463
1464 self._writecheck(zinfo)
1465 self._didModify = True
1466
1467 self.fp.write(zinfo.FileHeader(zip64))
1468
1469 self._writing = True
1470 return _ZipWriteFile(self, zinfo, zip64)
1471
Christian Heimes790c8232008-01-07 21:14:23 +00001472 def extract(self, member, path=None, pwd=None):
1473 """Extract a member from the archive to the current working directory,
1474 using its full name. Its file information is extracted as accurately
1475 as possible. `member' may be a filename or a ZipInfo object. You can
1476 specify a different directory using `path'.
1477 """
Christian Heimes790c8232008-01-07 21:14:23 +00001478 if path is None:
1479 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001480 else:
1481 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001482
1483 return self._extract_member(member, path, pwd)
1484
1485 def extractall(self, path=None, members=None, pwd=None):
1486 """Extract all members from the archive to the current working
1487 directory. `path' specifies a different directory to extract to.
1488 `members' is optional and must be a subset of the list returned
1489 by namelist().
1490 """
1491 if members is None:
1492 members = self.namelist()
1493
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001494 if path is None:
1495 path = os.getcwd()
1496 else:
1497 path = os.fspath(path)
1498
Christian Heimes790c8232008-01-07 21:14:23 +00001499 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001500 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001501
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001502 @classmethod
1503 def _sanitize_windows_name(cls, arcname, pathsep):
1504 """Replace bad characters and remove trailing dots from parts."""
1505 table = cls._windows_illegal_name_trans_table
1506 if not table:
1507 illegal = ':<>|"?*'
1508 table = str.maketrans(illegal, '_' * len(illegal))
1509 cls._windows_illegal_name_trans_table = table
1510 arcname = arcname.translate(table)
1511 # remove trailing dots
1512 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1513 # rejoin, removing empty parts.
1514 arcname = pathsep.join(x for x in arcname if x)
1515 return arcname
1516
Christian Heimes790c8232008-01-07 21:14:23 +00001517 def _extract_member(self, member, targetpath, pwd):
1518 """Extract the ZipInfo object 'member' to a physical
1519 file on the path targetpath.
1520 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001521 if not isinstance(member, ZipInfo):
1522 member = self.getinfo(member)
1523
Christian Heimes790c8232008-01-07 21:14:23 +00001524 # build the destination pathname, replacing
1525 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001526 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001527
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001528 if os.path.altsep:
1529 arcname = arcname.replace(os.path.altsep, os.path.sep)
1530 # interpret absolute pathname as relative, remove drive letter or
1531 # UNC path, redundant separators, "." and ".." components.
1532 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001533 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001534 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001535 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001536 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001537 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001538 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001539
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001540 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001541 targetpath = os.path.normpath(targetpath)
1542
1543 # Create all upper directories if necessary.
1544 upperdirs = os.path.dirname(targetpath)
1545 if upperdirs and not os.path.exists(upperdirs):
1546 os.makedirs(upperdirs)
1547
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001548 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001549 if not os.path.isdir(targetpath):
1550 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001551 return targetpath
1552
Antoine Pitrou17babc52012-11-17 23:50:08 +01001553 with self.open(member, pwd=pwd) as source, \
1554 open(targetpath, "wb") as target:
1555 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001556
1557 return targetpath
1558
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001559 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001560 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001561 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001562 import warnings
1563 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001564 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001565 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001566 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001567 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001568 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001569 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001570 if not self._allowZip64:
1571 requires_zip64 = None
1572 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1573 requires_zip64 = "Files count"
1574 elif zinfo.file_size > ZIP64_LIMIT:
1575 requires_zip64 = "Filesize"
1576 elif zinfo.header_offset > ZIP64_LIMIT:
1577 requires_zip64 = "Zipfile size"
1578 if requires_zip64:
1579 raise LargeZipFile(requires_zip64 +
1580 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001581
1582 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001583 """Put the bytes from filename into the archive under the name
1584 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001585 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001586 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001587 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001588 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001589 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001590 "Can't write to ZIP archive while an open writing handle exists"
1591 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001592
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001593 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001594
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001595 if zinfo.is_dir():
1596 zinfo.compress_size = 0
1597 zinfo.CRC = 0
1598 else:
1599 if compress_type is not None:
1600 zinfo.compress_type = compress_type
1601 else:
1602 zinfo.compress_type = self.compression
1603
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001604 if zinfo.is_dir():
1605 with self._lock:
1606 if self._seekable:
1607 self.fp.seek(self.start_dir)
1608 zinfo.header_offset = self.fp.tell() # Start of header bytes
1609 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001610 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001611 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001612
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001613 self._writecheck(zinfo)
1614 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001615
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001616 self.filelist.append(zinfo)
1617 self.NameToInfo[zinfo.filename] = zinfo
1618 self.fp.write(zinfo.FileHeader(False))
1619 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001620 else:
1621 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1622 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001623
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001624 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001625 """Write a file into the archive. The contents is 'data', which
1626 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1627 it is encoded as UTF-8 first.
1628 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001629 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001630 if isinstance(data, str):
1631 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001632 if not isinstance(zinfo_or_arcname, ZipInfo):
1633 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001634 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001635 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001636 if zinfo.filename[-1] == '/':
1637 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1638 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1639 else:
1640 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001641 else:
1642 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001643
1644 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001645 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001646 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001647 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001648 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001649 "Can't write to ZIP archive while an open writing handle exists."
1650 )
1651
1652 if compress_type is not None:
1653 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001654
Guido van Rossum85825dc2007-08-27 17:03:28 +00001655 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001656 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001657 with self.open(zinfo, mode='w') as dest:
1658 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001659
1660 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001661 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001662 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001663
1664 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001665 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001666 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001667 if self.fp is None:
1668 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001669
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001670 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001671 raise ValueError("Can't close the ZIP file while there is "
1672 "an open writing handle on it. "
1673 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001674
Antoine Pitrou17babc52012-11-17 23:50:08 +01001675 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001676 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001677 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001678 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001679 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001680 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001681 finally:
1682 fp = self.fp
1683 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001684 self._fpclose(fp)
1685
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001686 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001687 for zinfo in self.filelist: # write central directory
1688 dt = zinfo.date_time
1689 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1690 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1691 extra = []
1692 if zinfo.file_size > ZIP64_LIMIT \
1693 or zinfo.compress_size > ZIP64_LIMIT:
1694 extra.append(zinfo.file_size)
1695 extra.append(zinfo.compress_size)
1696 file_size = 0xffffffff
1697 compress_size = 0xffffffff
1698 else:
1699 file_size = zinfo.file_size
1700 compress_size = zinfo.compress_size
1701
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001702 header_offset = zinfo.header_offset - self._start_disk
1703 if header_offset > ZIP64_LIMIT:
1704 extra.append(header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001705 header_offset = 0xffffffff
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001706
1707 extra_data = zinfo.extra
1708 min_version = 0
1709 if extra:
1710 # Append a ZIP64 field to the extra's
1711 extra_data = struct.pack(
1712 '<HH' + 'Q'*len(extra),
1713 1, 8*len(extra), *extra) + extra_data
1714
1715 min_version = ZIP64_VERSION
1716
1717 if zinfo.compress_type == ZIP_BZIP2:
1718 min_version = max(BZIP2_VERSION, min_version)
1719 elif zinfo.compress_type == ZIP_LZMA:
1720 min_version = max(LZMA_VERSION, min_version)
1721
1722 extract_version = max(min_version, zinfo.extract_version)
1723 create_version = max(min_version, zinfo.create_version)
1724 try:
1725 filename, flag_bits = zinfo._encodeFilenameFlags()
1726 centdir = struct.pack(structCentralDir,
1727 stringCentralDir, create_version,
1728 zinfo.create_system, extract_version, zinfo.reserved,
1729 flag_bits, zinfo.compress_type, dostime, dosdate,
1730 zinfo.CRC, compress_size, file_size,
1731 len(filename), len(extra_data), len(zinfo.comment),
1732 0, zinfo.internal_attr, zinfo.external_attr,
1733 header_offset)
1734 except DeprecationWarning:
1735 print((structCentralDir, stringCentralDir, create_version,
1736 zinfo.create_system, extract_version, zinfo.reserved,
1737 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1738 zinfo.CRC, compress_size, file_size,
1739 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1740 0, zinfo.internal_attr, zinfo.external_attr,
1741 header_offset), file=sys.stderr)
1742 raise
1743 self.fp.write(centdir)
1744 self.fp.write(filename)
1745 self.fp.write(extra_data)
1746 self.fp.write(zinfo.comment)
1747
1748 pos2 = self.fp.tell()
1749 # Write end-of-zip-archive record
1750 centDirCount = len(self.filelist)
1751 centDirSize = pos2 - self.start_dir
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001752 centDirOffset = self.start_dir - self._start_disk
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001753 requires_zip64 = None
1754 if centDirCount > ZIP_FILECOUNT_LIMIT:
1755 requires_zip64 = "Files count"
1756 elif centDirOffset > ZIP64_LIMIT:
1757 requires_zip64 = "Central directory offset"
1758 elif centDirSize > ZIP64_LIMIT:
1759 requires_zip64 = "Central directory size"
1760 if requires_zip64:
1761 # Need to write the ZIP64 end-of-archive records
1762 if not self._allowZip64:
1763 raise LargeZipFile(requires_zip64 +
1764 " would require ZIP64 extensions")
1765 zip64endrec = struct.pack(
1766 structEndArchive64, stringEndArchive64,
1767 44, 45, 45, 0, 0, centDirCount, centDirCount,
1768 centDirSize, centDirOffset)
1769 self.fp.write(zip64endrec)
1770
1771 zip64locrec = struct.pack(
1772 structEndArchive64Locator,
1773 stringEndArchive64Locator, 0, pos2, 1)
1774 self.fp.write(zip64locrec)
1775 centDirCount = min(centDirCount, 0xFFFF)
1776 centDirSize = min(centDirSize, 0xFFFFFFFF)
1777 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1778
1779 endrec = struct.pack(structEndArchive, stringEndArchive,
1780 0, 0, centDirCount, centDirCount,
1781 centDirSize, centDirOffset, len(self._comment))
1782 self.fp.write(endrec)
1783 self.fp.write(self._comment)
1784 self.fp.flush()
1785
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001786 def _fpclose(self, fp):
1787 assert self._fileRefCnt > 0
1788 self._fileRefCnt -= 1
1789 if not self._fileRefCnt and not self._filePassed:
1790 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001791
1792
1793class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001794 """Class to create ZIP archives with Python library files and packages."""
1795
Georg Brandl8334fd92010-12-04 10:26:46 +00001796 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001797 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001798 ZipFile.__init__(self, file, mode=mode, compression=compression,
1799 allowZip64=allowZip64)
1800 self._optimize = optimize
1801
Christian Tismer59202e52013-10-21 03:59:23 +02001802 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001803 """Add all files from "pathname" to the ZIP archive.
1804
Fred Drake484d7352000-10-02 21:14:52 +00001805 If pathname is a package directory, search the directory and
1806 all package subdirectories recursively for all *.py and enter
1807 the modules into the archive. If pathname is a plain
1808 directory, listdir *.py and enter all modules. Else, pathname
1809 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001810 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001811 This method will compile the module.py into module.pyc if
1812 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001813 If filterfunc(pathname) is given, it is called with every argument.
1814 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001815 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001816 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001817 if filterfunc and not filterfunc(pathname):
1818 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001819 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001820 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001821 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001822 dir, name = os.path.split(pathname)
1823 if os.path.isdir(pathname):
1824 initname = os.path.join(pathname, "__init__.py")
1825 if os.path.isfile(initname):
1826 # This is a package directory, add it
1827 if basename:
1828 basename = "%s/%s" % (basename, name)
1829 else:
1830 basename = name
1831 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001832 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001833 fname, arcname = self._get_codename(initname[0:-3], basename)
1834 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001835 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001836 self.write(fname, arcname)
1837 dirlist = os.listdir(pathname)
1838 dirlist.remove("__init__.py")
1839 # Add all *.py files and package subdirectories
1840 for filename in dirlist:
1841 path = os.path.join(pathname, filename)
1842 root, ext = os.path.splitext(filename)
1843 if os.path.isdir(path):
1844 if os.path.isfile(os.path.join(path, "__init__.py")):
1845 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001846 self.writepy(path, basename,
1847 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001848 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001849 if filterfunc and not filterfunc(path):
1850 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001851 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001852 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001853 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001854 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001855 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001856 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001857 self.write(fname, arcname)
1858 else:
1859 # This is NOT a package directory, add its files at top level
1860 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001861 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001862 for filename in os.listdir(pathname):
1863 path = os.path.join(pathname, filename)
1864 root, ext = os.path.splitext(filename)
1865 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001866 if filterfunc and not filterfunc(path):
1867 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001868 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001869 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001870 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001871 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001872 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001873 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001874 self.write(fname, arcname)
1875 else:
1876 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001877 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001878 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001879 fname, arcname = self._get_codename(pathname[0:-3], basename)
1880 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001881 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001882 self.write(fname, arcname)
1883
1884 def _get_codename(self, pathname, basename):
1885 """Return (filename, archivename) for the path.
1886
Fred Drake484d7352000-10-02 21:14:52 +00001887 Given a module name path, return the correct file path and
1888 archive name, compiling if necessary. For example, given
1889 /python/lib/string, return (/python/lib/string.pyc, string).
1890 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001891 def _compile(file, optimize=-1):
1892 import py_compile
1893 if self.debug:
1894 print("Compiling", file)
1895 try:
1896 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001897 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001898 print(err.msg)
1899 return False
1900 return True
1901
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001902 file_py = pathname + ".py"
1903 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001904 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1905 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1906 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001907 if self._optimize == -1:
1908 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001909 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001910 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1911 # Use .pyc file.
1912 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001913 elif (os.path.isfile(pycache_opt0) and
1914 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001915 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1916 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001917 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001918 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001919 elif (os.path.isfile(pycache_opt1) and
1920 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1921 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001922 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001923 fname = pycache_opt1
1924 arcname = file_pyc
1925 elif (os.path.isfile(pycache_opt2) and
1926 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1927 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1928 # file name in the archive.
1929 fname = pycache_opt2
1930 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001931 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001932 # Compile py into PEP 3147 pyc file.
1933 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001934 if sys.flags.optimize == 0:
1935 fname = pycache_opt0
1936 elif sys.flags.optimize == 1:
1937 fname = pycache_opt1
1938 else:
1939 fname = pycache_opt2
1940 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001941 else:
1942 fname = arcname = file_py
1943 else:
1944 # new mode: use given optimization level
1945 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001946 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001947 arcname = file_pyc
1948 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001949 arcname = file_pyc
1950 if self._optimize == 1:
1951 fname = pycache_opt1
1952 elif self._optimize == 2:
1953 fname = pycache_opt2
1954 else:
1955 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1956 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001957 if not (os.path.isfile(fname) and
1958 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1959 if not _compile(file_py, optimize=self._optimize):
1960 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001961 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001962 if basename:
1963 archivename = "%s/%s" % (basename, archivename)
1964 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001965
1966
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001967def main(args=None):
1968 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001969
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001970 description = 'A simple command line interface for zipfile module.'
1971 parser = argparse.ArgumentParser(description=description)
1972 group = parser.add_mutually_exclusive_group()
1973 group.add_argument('-l', '--list', metavar='<zipfile>',
1974 help='Show listing of a zipfile')
1975 group.add_argument('-e', '--extract', nargs=2,
1976 metavar=('<zipfile>', '<output_dir>'),
1977 help='Extract zipfile into target dir')
1978 group.add_argument('-c', '--create', nargs='+',
1979 metavar=('<name>', '<file>'),
1980 help='Create zipfile from sources')
1981 group.add_argument('-t', '--test', metavar='<zipfile>',
1982 help='Test if a zipfile is valid')
1983 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001984
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001985 if args.test is not None:
1986 src = args.test
1987 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001988 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001989 if badfile:
1990 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001991 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001993 elif args.list is not None:
1994 src = args.list
1995 with ZipFile(src, 'r') as zf:
1996 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001997
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001998 elif args.extract is not None:
1999 src, curdir = args.extract
2000 with ZipFile(src, 'r') as zf:
2001 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002002
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002003 elif args.create is not None:
2004 zip_name = args.create.pop(0)
2005 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002006
2007 def addToZip(zf, path, zippath):
2008 if os.path.isfile(path):
2009 zf.write(path, zippath, ZIP_DEFLATED)
2010 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002011 if zippath:
2012 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002013 for nm in os.listdir(path):
2014 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002015 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002016 # else: ignore
2017
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002018 with ZipFile(zip_name, 'w') as zf:
2019 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002020 zippath = os.path.basename(path)
2021 if not zippath:
2022 zippath = os.path.basename(os.path.dirname(path))
2023 if zippath in ('', os.curdir, os.pardir):
2024 zippath = ''
2025 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002026
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002027 else:
2028 parser.exit(2, parser.format_usage())
2029
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002030if __name__ == "__main__":
2031 main()