blob: 988f39ed1b13d07dcc0f7fbdbc98edc831dec700 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
15
Serhiy Storchaka9e777732015-10-10 19:43:32 +030016try:
17 import threading
18except ImportError:
19 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020
21try:
Tim Peterse1190062001-01-15 03:34:38 +000022 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040024except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000026 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028try:
29 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040030except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020031 bz2 = None
32
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033try:
34 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040035except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 lzma = None
37
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020038__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020039 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000040 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000041
Georg Brandl4d540882010-10-28 06:42:33 +000042class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000043 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
45
46class LargeZipFile(Exception):
47 """
48 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
49 and those extensions are disabled.
50 """
51
Georg Brandl4d540882010-10-28 06:42:33 +000052error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
53
Guido van Rossum32abe6f2000-03-31 17:30:02 +000054
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000055ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030056ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000057ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000058
Guido van Rossum32abe6f2000-03-31 17:30:02 +000059# constants for Zip file compression methods
60ZIP_STORED = 0
61ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020063ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000064# Other ZIP compression methods not supported
65
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020066DEFAULT_VERSION = 20
67ZIP64_VERSION = 45
68BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020070# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020071MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020072
Martin v. Löwisb09b8442008-07-03 14:13:42 +000073# Below are some formats and associated data for reading/writing headers using
74# the struct module. The names and structures of headers/records are those used
75# in the PKWARE description of the ZIP file format:
76# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
77# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000078
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079# The "end of central directory" structure, magic number, size, and indices
80# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000081structEndArchive = b"<4s4H2LH"
82stringEndArchive = b"PK\005\006"
83sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000084
85_ECD_SIGNATURE = 0
86_ECD_DISK_NUMBER = 1
87_ECD_DISK_START = 2
88_ECD_ENTRIES_THIS_DISK = 3
89_ECD_ENTRIES_TOTAL = 4
90_ECD_SIZE = 5
91_ECD_OFFSET = 6
92_ECD_COMMENT_SIZE = 7
93# These last two indices are not part of the structure as defined in the
94# spec, but they are used internally by this module as a convenience
95_ECD_COMMENT = 8
96_ECD_LOCATION = 9
97
98# The "central directory" structure, magic number, size, and indices
99# of entries in the structure (section V.F in the format document)
100structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000101stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102sizeCentralDir = struct.calcsize(structCentralDir)
103
Fred Drake3e038e52001-02-28 17:56:26 +0000104# indexes of entries in the central directory structure
105_CD_SIGNATURE = 0
106_CD_CREATE_VERSION = 1
107_CD_CREATE_SYSTEM = 2
108_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000110_CD_FLAG_BITS = 5
111_CD_COMPRESS_TYPE = 6
112_CD_TIME = 7
113_CD_DATE = 8
114_CD_CRC = 9
115_CD_COMPRESSED_SIZE = 10
116_CD_UNCOMPRESSED_SIZE = 11
117_CD_FILENAME_LENGTH = 12
118_CD_EXTRA_FIELD_LENGTH = 13
119_CD_COMMENT_LENGTH = 14
120_CD_DISK_NUMBER_START = 15
121_CD_INTERNAL_FILE_ATTRIBUTES = 16
122_CD_EXTERNAL_FILE_ATTRIBUTES = 17
123_CD_LOCAL_HEADER_OFFSET = 18
124
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125# The "local file header" structure, magic number, size, and indices
126# (section V.A in the format document)
127structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129sizeFileHeader = struct.calcsize(structFileHeader)
130
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_SIGNATURE = 0
132_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000133_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000134_FH_GENERAL_PURPOSE_FLAG_BITS = 3
135_FH_COMPRESSION_METHOD = 4
136_FH_LAST_MOD_TIME = 5
137_FH_LAST_MOD_DATE = 6
138_FH_CRC = 7
139_FH_COMPRESSED_SIZE = 8
140_FH_UNCOMPRESSED_SIZE = 9
141_FH_FILENAME_LENGTH = 10
142_FH_EXTRA_FIELD_LENGTH = 11
143
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000145structEndArchive64Locator = "<4sLQL"
146stringEndArchive64Locator = b"PK\x06\x07"
147sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000148
149# The "Zip64 end of central directory" record, magic number, size, and indices
150# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000151structEndArchive64 = "<4sQ2H2L4Q"
152stringEndArchive64 = b"PK\x06\x06"
153sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000154
155_CD64_SIGNATURE = 0
156_CD64_DIRECTORY_RECSIZE = 1
157_CD64_CREATE_VERSION = 2
158_CD64_EXTRACT_VERSION = 3
159_CD64_DISK_NUMBER = 4
160_CD64_DISK_NUMBER_START = 5
161_CD64_NUMBER_ENTRIES_THIS_DISK = 6
162_CD64_NUMBER_ENTRIES_TOTAL = 7
163_CD64_DIRECTORY_SIZE = 8
164_CD64_OFFSET_START_CENTDIR = 9
165
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000167 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000168 if _EndRecData(fp):
169 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200170 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000172 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000173
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000174def is_zipfile(filename):
175 """Quickly see if a file is a ZIP file by checking the magic number.
176
177 The filename argument may be a file or file-like object too.
178 """
179 result = False
180 try:
181 if hasattr(filename, "read"):
182 result = _check_zipfile(fp=filename)
183 else:
184 with open(filename, "rb") as fp:
185 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200186 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000187 pass
188 return result
189
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000190def _EndRecData64(fpin, offset, endrec):
191 """
192 Read the ZIP64 end-of-archive records and use that to update endrec
193 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 try:
195 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200196 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000197 # If the seek fails, the file is not large enough to contain a ZIP64
198 # end-of-archive record, so just return the end record we were given.
199 return endrec
200
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000201 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200202 if len(data) != sizeEndCentDir64Locator:
203 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000204 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
205 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000206 return endrec
207
208 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000209 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000210
211 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000212 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
213 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200214 if len(data) != sizeEndCentDir64:
215 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000216 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200217 dircount, dircount2, dirsize, diroffset = \
218 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000219 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000220 return endrec
221
222 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000223 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000224 endrec[_ECD_DISK_NUMBER] = disk_num
225 endrec[_ECD_DISK_START] = disk_dir
226 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
227 endrec[_ECD_ENTRIES_TOTAL] = dircount2
228 endrec[_ECD_SIZE] = dirsize
229 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 return endrec
231
232
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000233def _EndRecData(fpin):
234 """Return data from the "End of Central Directory" record, or None.
235
236 The data is a list of the nine items in the ZIP "End of central dir"
237 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238
239 # Determine file size
240 fpin.seek(0, 2)
241 filesize = fpin.tell()
242
243 # Check to see if this is ZIP file with no archive comment (the
244 # "end of central directory" structure should be the last item in the
245 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 try:
247 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200248 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000249 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200251 if (len(data) == sizeEndCentDir and
252 data[0:4] == stringEndArchive and
253 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000256 endrec=list(endrec)
257
258 # Append a blank comment and record start offset
259 endrec.append(b"")
260 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000262 # Try to read the "Zip64 end of central directory" structure
263 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000264
265 # Either this is not a ZIP file, or it is a ZIP file with an archive
266 # comment. Search the end of the file for the "end of central directory"
267 # record signature. The comment is the last item in the ZIP file and may be
268 # up to 64K long. It is assumed that the "end of central directory" magic
269 # number does not appear in the comment.
270 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
271 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000272 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000273 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000274 if start >= 0:
275 # found the magic number; attempt to unpack and interpret
276 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200277 if len(recData) != sizeEndCentDir:
278 # Zip file is corrupted.
279 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000280 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400281 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
282 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
283 endrec.append(comment)
284 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000285
R David Murray4fbb9db2011-06-09 15:50:51 -0400286 # Try to read the "Zip64 end of central directory" structure
287 return _EndRecData64(fpin, maxCommentStart + start - filesize,
288 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000289
290 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200291 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000292
Fred Drake484d7352000-10-02 21:14:52 +0000293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000295 """Class with attributes describing each file in the ZIP archive."""
296
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200298 'orig_filename',
299 'filename',
300 'date_time',
301 'compress_type',
302 'comment',
303 'extra',
304 'create_system',
305 'create_version',
306 'extract_version',
307 'reserved',
308 'flag_bits',
309 'volume',
310 'internal_attr',
311 'external_attr',
312 'header_offset',
313 'CRC',
314 'compress_size',
315 'file_size',
316 '_raw_time',
317 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000320 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000321
322 # Terminate the file name at the first null byte. Null bytes in file
323 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000324 null_byte = filename.find(chr(0))
325 if null_byte >= 0:
326 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000327 # This is used to ensure paths in generated ZIP files always use
328 # forward slashes as the directory separator, as required by the
329 # ZIP format specification.
330 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000331 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332
Greg Ward8e36d282003-06-18 00:53:06 +0000333 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000334 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800335
336 if date_time[0] < 1980:
337 raise ValueError('ZIP does not support timestamps before 1980')
338
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000340 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000341 self.comment = b"" # Comment for each file
342 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000343 if sys.platform == 'win32':
344 self.create_system = 0 # System which created ZIP archive
345 else:
346 # Assume everything else is unix-y
347 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200348 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
349 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000350 self.reserved = 0 # Must be zero
351 self.flag_bits = 0 # ZIP flag bits
352 self.volume = 0 # Volume number of file header
353 self.internal_attr = 0 # Internal attributes
354 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000355 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000356 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000357 # CRC CRC-32 of the uncompressed file
358 # compress_size Size of the compressed file
359 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000360
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200361 def __repr__(self):
362 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
363 if self.compress_type != ZIP_STORED:
364 result.append(' compress_type=%s' %
365 compressor_names.get(self.compress_type,
366 self.compress_type))
367 hi = self.external_attr >> 16
368 lo = self.external_attr & 0xFFFF
369 if hi:
370 result.append(' filemode=%r' % stat.filemode(hi))
371 if lo:
372 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200373 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200374 if not isdir or self.file_size:
375 result.append(' file_size=%r' % self.file_size)
376 if ((not isdir or self.compress_size) and
377 (self.compress_type != ZIP_STORED or
378 self.file_size != self.compress_size)):
379 result.append(' compress_size=%r' % self.compress_size)
380 result.append('>')
381 return ''.join(result)
382
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200383 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000384 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 dt = self.date_time
386 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000387 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000389 # Set these to zero because we write them after the file data
390 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000391 else:
Tim Peterse1190062001-01-15 03:34:38 +0000392 CRC = self.CRC
393 compress_size = self.compress_size
394 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395
396 extra = self.extra
397
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200398 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200399 if zip64 is None:
400 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
401 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000402 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000403 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200404 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200405 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
406 if not zip64:
407 raise LargeZipFile("Filesize would require ZIP64 extensions")
408 # File is larger than what fits into a 4 byte integer,
409 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000410 file_size = 0xffffffff
411 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200414 if self.compress_type == ZIP_BZIP2:
415 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200416 elif self.compress_type == ZIP_LZMA:
417 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200418
419 self.extract_version = max(min_version, self.extract_version)
420 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000421 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000422 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200423 self.extract_version, self.reserved, flag_bits,
424 self.compress_type, dostime, dosdate, CRC,
425 compress_size, file_size,
426 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000427 return header + filename + extra
428
429 def _encodeFilenameFlags(self):
430 try:
431 return self.filename.encode('ascii'), self.flag_bits
432 except UnicodeEncodeError:
433 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000434
435 def _decodeExtra(self):
436 # Try to decode the extra field.
437 extra = self.extra
438 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700439 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000440 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200441 if ln+4 > len(extra):
442 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
443 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000447 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000448 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000449 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450 elif ln == 0:
451 counts = ()
452 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300453 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000454
455 idx = 0
456
457 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000458 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.file_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 self.compress_size = counts[idx]
464 idx += 1
465
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000466 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467 old = self.header_offset
468 self.header_offset = counts[idx]
469 idx+=1
470
471 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000472
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200473 @classmethod
474 def from_file(cls, filename, arcname=None):
475 """Construct an appropriate ZipInfo for a file on the filesystem.
476
477 filename should be the path to a file or directory on the filesystem.
478
479 arcname is the name which it will have within the archive (by default,
480 this will be the same as filename, but without a drive letter and with
481 leading path separators removed).
482 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200483 if isinstance(filename, os.PathLike):
484 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200485 st = os.stat(filename)
486 isdir = stat.S_ISDIR(st.st_mode)
487 mtime = time.localtime(st.st_mtime)
488 date_time = mtime[0:6]
489 # Create ZipInfo instance to store file information
490 if arcname is None:
491 arcname = filename
492 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
493 while arcname[0] in (os.sep, os.altsep):
494 arcname = arcname[1:]
495 if isdir:
496 arcname += '/'
497 zinfo = cls(arcname, date_time)
498 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
499 if isdir:
500 zinfo.file_size = 0
501 zinfo.external_attr |= 0x10 # MS-DOS directory flag
502 else:
503 zinfo.file_size = st.st_size
504
505 return zinfo
506
507 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300508 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200509 return self.filename[-1] == '/'
510
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000511
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300512# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
513# internal keys. We noticed that a direct implementation is faster than
514# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000515
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300516_crctable = None
517def _gen_crc(crc):
518 for j in range(8):
519 if crc & 1:
520 crc = (crc >> 1) ^ 0xEDB88320
521 else:
522 crc >>= 1
523 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000524
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300525# ZIP supports a password-based form of encryption. Even though known
526# plaintext attacks have been found against it, it is still useful
527# to be able to get data out of such a file.
528#
529# Usage:
530# zd = _ZipDecrypter(mypwd)
531# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000532
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300533def _ZipDecrypter(pwd):
534 key0 = 305419896
535 key1 = 591751049
536 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000537
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300538 global _crctable
539 if _crctable is None:
540 _crctable = list(map(_gen_crc, range(256)))
541 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000542
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300543 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300545 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000546
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300547 def update_keys(c):
548 nonlocal key0, key1, key2
549 key0 = crc32(c, key0)
550 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
551 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
552 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000553
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300554 for p in pwd:
555 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000556
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300557 def decrypter(data):
558 """Decrypt a bytes object."""
559 result = bytearray()
560 append = result.append
561 for c in data:
562 k = key2 | 2
563 c ^= ((k * (k^1)) >> 8) & 0xFF
564 update_keys(c)
565 append(c)
566 return bytes(result)
567
568 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000569
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200570
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200571class LZMACompressor:
572
573 def __init__(self):
574 self._comp = None
575
576 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200577 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200578 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200579 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200580 ])
581 return struct.pack('<BBH', 9, 4, len(props)) + props
582
583 def compress(self, data):
584 if self._comp is None:
585 return self._init() + self._comp.compress(data)
586 return self._comp.compress(data)
587
588 def flush(self):
589 if self._comp is None:
590 return self._init() + self._comp.flush()
591 return self._comp.flush()
592
593
594class LZMADecompressor:
595
596 def __init__(self):
597 self._decomp = None
598 self._unconsumed = b''
599 self.eof = False
600
601 def decompress(self, data):
602 if self._decomp is None:
603 self._unconsumed += data
604 if len(self._unconsumed) <= 4:
605 return b''
606 psize, = struct.unpack('<H', self._unconsumed[2:4])
607 if len(self._unconsumed) <= 4 + psize:
608 return b''
609
610 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200611 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
612 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200613 ])
614 data = self._unconsumed[4 + psize:]
615 del self._unconsumed
616
617 result = self._decomp.decompress(data)
618 self.eof = self._decomp.eof
619 return result
620
621
622compressor_names = {
623 0: 'store',
624 1: 'shrink',
625 2: 'reduce',
626 3: 'reduce',
627 4: 'reduce',
628 5: 'reduce',
629 6: 'implode',
630 7: 'tokenize',
631 8: 'deflate',
632 9: 'deflate64',
633 10: 'implode',
634 12: 'bzip2',
635 14: 'lzma',
636 18: 'terse',
637 19: 'lz77',
638 97: 'wavpack',
639 98: 'ppmd',
640}
641
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200642def _check_compression(compression):
643 if compression == ZIP_STORED:
644 pass
645 elif compression == ZIP_DEFLATED:
646 if not zlib:
647 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200648 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200649 elif compression == ZIP_BZIP2:
650 if not bz2:
651 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200652 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200653 elif compression == ZIP_LZMA:
654 if not lzma:
655 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200656 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200657 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300658 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200659
660
661def _get_compressor(compress_type):
662 if compress_type == ZIP_DEFLATED:
663 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200664 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200665 elif compress_type == ZIP_BZIP2:
666 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200667 elif compress_type == ZIP_LZMA:
668 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200669 else:
670 return None
671
672
673def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200674 if compress_type == ZIP_STORED:
675 return None
676 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200677 return zlib.decompressobj(-15)
678 elif compress_type == ZIP_BZIP2:
679 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200680 elif compress_type == ZIP_LZMA:
681 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200682 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200683 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200684 if descr:
685 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
686 else:
687 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200688
689
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200690class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300691 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200692 self._file = file
693 self._pos = pos
694 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200695 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300696 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200697
698 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200699 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300700 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300701 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300702 "is an open writing handle on it. "
703 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200704 self._file.seek(self._pos)
705 data = self._file.read(n)
706 self._pos = self._file.tell()
707 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200708
709 def close(self):
710 if self._file is not None:
711 fileobj = self._file
712 self._file = None
713 self._close(fileobj)
714
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200715# Provide the tell method for unseekable stream
716class _Tellable:
717 def __init__(self, fp):
718 self.fp = fp
719 self.offset = 0
720
721 def write(self, data):
722 n = self.fp.write(data)
723 self.offset += n
724 return n
725
726 def tell(self):
727 return self.offset
728
729 def flush(self):
730 self.fp.flush()
731
732 def close(self):
733 self.fp.close()
734
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200735
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000736class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000737 """File-like object for reading an archive member.
738 Is returned by ZipFile.open().
739 """
740
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000741 # Max size supported by decompressor.
742 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000743
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000744 # Read from compressed files in 4k blocks.
745 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000746
Łukasz Langae94980a2010-11-22 23:31:26 +0000747 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
748 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000749 self._fileobj = fileobj
750 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000751 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000752
Ezio Melotti92b47432010-01-28 01:44:41 +0000753 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000754 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200755 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000756
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200757 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200759 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000760 self._readbuffer = b''
761 self._offset = 0
762
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000763 self.newlines = None
764
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000765 # Adjust read size for encrypted files since the first 12 bytes
766 # are for the encryption/password information.
767 if self._decrypter is not None:
768 self._compress_left -= 12
769
770 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000771 self.name = zipinfo.filename
772
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000773 if hasattr(zipinfo, 'CRC'):
774 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000775 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000776 else:
777 self._expected_crc = None
778
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200779 def __repr__(self):
780 result = ['<%s.%s' % (self.__class__.__module__,
781 self.__class__.__qualname__)]
782 if not self.closed:
783 result.append(' name=%r mode=%r' % (self.name, self.mode))
784 if self._compress_type != ZIP_STORED:
785 result.append(' compress_type=%s' %
786 compressor_names.get(self._compress_type,
787 self._compress_type))
788 else:
789 result.append(' [closed]')
790 result.append('>')
791 return ''.join(result)
792
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793 def readline(self, limit=-1):
794 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000795
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000796 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798
Serhiy Storchakae670be22016-06-11 19:32:44 +0300799 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800 # Shortcut common case - newline found in buffer.
801 i = self._readbuffer.find(b'\n', self._offset) + 1
802 if i > 0:
803 line = self._readbuffer[self._offset: i]
804 self._offset = i
805 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806
Serhiy Storchakae670be22016-06-11 19:32:44 +0300807 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000808
809 def peek(self, n=1):
810 """Returns buffered bytes without advancing the position."""
811 if n > len(self._readbuffer) - self._offset:
812 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200813 if len(chunk) > self._offset:
814 self._readbuffer = chunk + self._readbuffer[self._offset:]
815 self._offset = 0
816 else:
817 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000818
819 # Return up to 512 bytes to reduce allocation overhead for tight loops.
820 return self._readbuffer[self._offset: self._offset + 512]
821
822 def readable(self):
823 return True
824
825 def read(self, n=-1):
826 """Read and return up to n bytes.
827 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200829 if n is None or n < 0:
830 buf = self._readbuffer[self._offset:]
831 self._readbuffer = b''
832 self._offset = 0
833 while not self._eof:
834 buf += self._read1(self.MAX_N)
835 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836
Antoine Pitrou78157b32012-06-23 16:44:48 +0200837 end = n + self._offset
838 if end < len(self._readbuffer):
839 buf = self._readbuffer[self._offset:end]
840 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200841 return buf
842
Antoine Pitrou78157b32012-06-23 16:44:48 +0200843 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200844 buf = self._readbuffer[self._offset:]
845 self._readbuffer = b''
846 self._offset = 0
847 while n > 0 and not self._eof:
848 data = self._read1(n)
849 if n < len(data):
850 self._readbuffer = data
851 self._offset = n
852 buf += data[:n]
853 break
854 buf += data
855 n -= len(data)
856 return buf
857
858 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000859 # Update the CRC using the given data.
860 if self._expected_crc is None:
861 # No need to compute the CRC if we don't have a reference value
862 return
Martin Panterb82032f2015-12-11 05:19:29 +0000863 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000864 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200865 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000866 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000867
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000868 def read1(self, n):
869 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200871 if n is None or n < 0:
872 buf = self._readbuffer[self._offset:]
873 self._readbuffer = b''
874 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300875 while not self._eof:
876 data = self._read1(self.MAX_N)
877 if data:
878 buf += data
879 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200880 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881
Antoine Pitrou78157b32012-06-23 16:44:48 +0200882 end = n + self._offset
883 if end < len(self._readbuffer):
884 buf = self._readbuffer[self._offset:end]
885 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200886 return buf
887
Antoine Pitrou78157b32012-06-23 16:44:48 +0200888 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200889 buf = self._readbuffer[self._offset:]
890 self._readbuffer = b''
891 self._offset = 0
892 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300893 while not self._eof:
894 data = self._read1(n)
895 if n < len(data):
896 self._readbuffer = data
897 self._offset = n
898 buf += data[:n]
899 break
900 if data:
901 buf += data
902 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 return buf
904
905 def _read1(self, n):
906 # Read up to n compressed bytes with at most one read() system call,
907 # decrypt and decompress them.
908 if self._eof or n <= 0:
909 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000911 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200912 if self._compress_type == ZIP_DEFLATED:
913 ## Handle unconsumed data.
914 data = self._decompressor.unconsumed_tail
915 if n > len(data):
916 data += self._read2(n - len(data))
917 else:
918 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200920 if self._compress_type == ZIP_STORED:
921 self._eof = self._compress_left <= 0
922 elif self._compress_type == ZIP_DEFLATED:
923 n = max(n, self.MIN_READ_SIZE)
924 data = self._decompressor.decompress(data, n)
925 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200926 self._compress_left <= 0 and
927 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200928 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000929 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200930 else:
931 data = self._decompressor.decompress(data)
932 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 data = data[:self._left]
935 self._left -= len(data)
936 if self._left <= 0:
937 self._eof = True
938 self._update_crc(data)
939 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000940
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200941 def _read2(self, n):
942 if self._compress_left <= 0:
943 return b''
944
945 n = max(n, self.MIN_READ_SIZE)
946 n = min(n, self._compress_left)
947
948 data = self._fileobj.read(n)
949 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200950 if not data:
951 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200952
953 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300954 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000955 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000956
Łukasz Langae94980a2010-11-22 23:31:26 +0000957 def close(self):
958 try:
959 if self._close_fileobj:
960 self._fileobj.close()
961 finally:
962 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000964
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300965class _ZipWriteFile(io.BufferedIOBase):
966 def __init__(self, zf, zinfo, zip64):
967 self._zinfo = zinfo
968 self._zip64 = zip64
969 self._zipfile = zf
970 self._compressor = _get_compressor(zinfo.compress_type)
971 self._file_size = 0
972 self._compress_size = 0
973 self._crc = 0
974
975 @property
976 def _fileobj(self):
977 return self._zipfile.fp
978
979 def writable(self):
980 return True
981
982 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +0300983 if self.closed:
984 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300985 nbytes = len(data)
986 self._file_size += nbytes
987 self._crc = crc32(data, self._crc)
988 if self._compressor:
989 data = self._compressor.compress(data)
990 self._compress_size += len(data)
991 self._fileobj.write(data)
992 return nbytes
993
994 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +0300995 if self.closed:
996 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300997 super().close()
998 # Flush any data from the compressor, and update header info
999 if self._compressor:
1000 buf = self._compressor.flush()
1001 self._compress_size += len(buf)
1002 self._fileobj.write(buf)
1003 self._zinfo.compress_size = self._compress_size
1004 else:
1005 self._zinfo.compress_size = self._file_size
1006 self._zinfo.CRC = self._crc
1007 self._zinfo.file_size = self._file_size
1008
1009 # Write updated header info
1010 if self._zinfo.flag_bits & 0x08:
1011 # Write CRC and file sizes after the file data
1012 fmt = '<LQQ' if self._zip64 else '<LLL'
1013 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1014 self._zinfo.compress_size, self._zinfo.file_size))
1015 self._zipfile.start_dir = self._fileobj.tell()
1016 else:
1017 if not self._zip64:
1018 if self._file_size > ZIP64_LIMIT:
1019 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1020 'limit')
1021 if self._compress_size > ZIP64_LIMIT:
1022 raise RuntimeError('Compressed size unexpectedly exceeded '
1023 'ZIP64 limit')
1024 # Seek backwards and write file header (which will now include
1025 # correct CRC and file sizes)
1026
1027 # Preserve current position in file
1028 self._zipfile.start_dir = self._fileobj.tell()
1029 self._fileobj.seek(self._zinfo.header_offset)
1030 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1031 self._fileobj.seek(self._zipfile.start_dir)
1032
1033 self._zipfile._writing = False
1034
1035 # Successfully written: Add file to our caches
1036 self._zipfile.filelist.append(self._zinfo)
1037 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1038
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001039class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001040 """ Class with methods to open, read, write, close, list zip files.
1041
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001042 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001043
Fred Drake3d9091e2001-03-26 15:49:24 +00001044 file: Either the path to the file, or a file-like object.
1045 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001046 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1047 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001048 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1049 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001050 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1051 needed, otherwise it will raise an exception when this would
1052 be necessary.
1053
Fred Drake3d9091e2001-03-26 15:49:24 +00001054 """
Fred Drake484d7352000-10-02 21:14:52 +00001055
Fred Drake90eac282001-02-28 05:29:34 +00001056 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001057 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001058
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001059 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001060 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1061 or append 'a'."""
1062 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001063 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001064
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001065 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001066
1067 self._allowZip64 = allowZip64
1068 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001069 self.debug = 0 # Level of printing: 0 through 3
1070 self.NameToInfo = {} # Find file info given name
1071 self.filelist = [] # List of ZipInfo instances for archive
1072 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001073 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001074 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001075 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001076
Fred Drake3d9091e2001-03-26 15:49:24 +00001077 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001078 if isinstance(file, os.PathLike):
1079 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001080 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001081 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001082 self._filePassed = 0
1083 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001084 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1085 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001086 filemode = modeDict[mode]
1087 while True:
1088 try:
1089 self.fp = io.open(file, filemode)
1090 except OSError:
1091 if filemode in modeDict:
1092 filemode = modeDict[filemode]
1093 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001094 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001095 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001096 else:
1097 self._filePassed = 1
1098 self.fp = file
1099 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001100 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001101 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001102 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001103 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001104
Antoine Pitrou17babc52012-11-17 23:50:08 +01001105 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001106 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001107 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001108 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001109 # set the modified flag so central directory gets written
1110 # even if no files are added to the archive
1111 self._didModify = True
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001112 self._start_disk = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001113 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001114 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001115 except (AttributeError, OSError):
1116 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001117 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001118 self._seekable = False
1119 else:
1120 # Some file-like objects can provide tell() but not seek()
1121 try:
1122 self.fp.seek(self.start_dir)
1123 except (AttributeError, OSError):
1124 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001125 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001126 try:
1127 # See if file is a zip file
1128 self._RealGetContents()
1129 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001130 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001131 except BadZipFile:
1132 # file is not a zip file, just append
1133 self.fp.seek(0, 2)
1134
1135 # set the modified flag so central directory gets written
1136 # even if no files are added to the archive
1137 self._didModify = True
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001138 self.start_dir = self._start_disk = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001139 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001140 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001141 except:
1142 fp = self.fp
1143 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001144 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001145 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001147 def __enter__(self):
1148 return self
1149
1150 def __exit__(self, type, value, traceback):
1151 self.close()
1152
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001153 def __repr__(self):
1154 result = ['<%s.%s' % (self.__class__.__module__,
1155 self.__class__.__qualname__)]
1156 if self.fp is not None:
1157 if self._filePassed:
1158 result.append(' file=%r' % self.fp)
1159 elif self.filename is not None:
1160 result.append(' filename=%r' % self.filename)
1161 result.append(' mode=%r' % self.mode)
1162 else:
1163 result.append(' [closed]')
1164 result.append('>')
1165 return ''.join(result)
1166
Tim Peters7d3bad62001-04-04 18:56:49 +00001167 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001168 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001169 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001170 try:
1171 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001172 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001173 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001174 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001175 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001177 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001178 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1179 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001180 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001181
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001182 # self._start_disk: Position of the start of ZIP archive
1183 # It is zero, unless ZIP was concatenated to another file
1184 self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001185 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1186 # If Zip64 extension structures are present, account for them
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001187 self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001188
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 if self.debug > 2:
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001190 inferred = self._start_disk + offset_cd
1191 print("given, inferred, offset", offset_cd, inferred, self._start_disk)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 # self.start_dir: Position of start of central directory
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001193 self.start_dir = offset_cd + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001195 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001196 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001197 total = 0
1198 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001199 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001200 if len(centdir) != sizeCentralDir:
1201 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001202 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001203 if centdir[_CD_SIGNATURE] != stringCentralDir:
1204 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001205 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001206 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001207 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001208 flags = centdir[5]
1209 if flags & 0x800:
1210 # UTF-8 file names extension
1211 filename = filename.decode('utf-8')
1212 else:
1213 # Historical ZIP filename encoding
1214 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001216 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001217 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1218 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001219 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001220 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001221 x.flag_bits, x.compress_type, t, d,
1222 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001223 if x.extract_version > MAX_EXTRACT_VERSION:
1224 raise NotImplementedError("zip file version %.1f" %
1225 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001226 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1227 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001228 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001229 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001230 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001231
1232 x._decodeExtra()
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001233 x.header_offset = x.header_offset + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001234 self.filelist.append(x)
1235 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001236
1237 # update total bytes read from central directory
1238 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1239 + centdir[_CD_EXTRA_FIELD_LENGTH]
1240 + centdir[_CD_COMMENT_LENGTH])
1241
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001243 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001244
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001245
1246 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001247 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001248 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001249
1250 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001251 """Return a list of class ZipInfo instances for files in the
1252 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001253 return self.filelist
1254
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001255 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001256 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001257 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1258 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001259 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001260 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001261 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1262 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263
1264 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001265 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001266 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001267 for zinfo in self.filelist:
1268 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001269 # Read by chunks, to avoid an OverflowError or a
1270 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001271 with self.open(zinfo.filename, "r") as f:
1272 while f.read(chunk_size): # Check CRC-32
1273 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001274 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001275 return zinfo.filename
1276
1277 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001278 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001279 info = self.NameToInfo.get(name)
1280 if info is None:
1281 raise KeyError(
1282 'There is no item named %r in the archive' % name)
1283
1284 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001285
Thomas Wouterscf297e42007-02-23 15:07:44 +00001286 def setpassword(self, pwd):
1287 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001288 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001289 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001290 if pwd:
1291 self.pwd = pwd
1292 else:
1293 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001294
R David Murrayf50b38a2012-04-12 18:44:58 -04001295 @property
1296 def comment(self):
1297 """The comment text associated with the ZIP file."""
1298 return self._comment
1299
1300 @comment.setter
1301 def comment(self, comment):
1302 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001303 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001304 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001305 if len(comment) > ZIP_MAX_COMMENT:
1306 import warnings
1307 warnings.warn('Archive comment is too long; truncating to %d bytes'
1308 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001309 comment = comment[:ZIP_MAX_COMMENT]
1310 self._comment = comment
1311 self._didModify = True
1312
Thomas Wouterscf297e42007-02-23 15:07:44 +00001313 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001314 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001315 with self.open(name, "r", pwd) as fp:
1316 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001317
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001318 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001319 """Return file-like object for 'name'.
1320
1321 name is a string for the file name within the ZIP file, or a ZipInfo
1322 object.
1323
1324 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1325 write to a file newly added to the archive.
1326
1327 pwd is the password to decrypt files (only used for reading).
1328
1329 When writing, if the file size is not known in advance but may exceed
1330 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1331 files. If the size is known in advance, it is best to pass a ZipInfo
1332 instance for name, with zinfo.file_size set.
1333 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001334 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001335 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001336 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001337 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001338 if pwd and (mode == "w"):
1339 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001341 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001342 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001344 # Make sure we have an info object
1345 if isinstance(name, ZipInfo):
1346 # 'name' is already an info object
1347 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001348 elif mode == 'w':
1349 zinfo = ZipInfo(name)
1350 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001351 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001352 # Get info object for name
1353 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001354
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001355 if mode == 'w':
1356 return self._open_to_write(zinfo, force_zip64=force_zip64)
1357
1358 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001359 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001360 "is an open writing handle on it. "
1361 "Close the writing handle before trying to read.")
1362
1363 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001364 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001365 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1366 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001367 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001368 # Skip the file header:
1369 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001370 if len(fheader) != sizeFileHeader:
1371 raise BadZipFile("Truncated file header")
1372 fheader = struct.unpack(structFileHeader, fheader)
1373 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001374 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001375
Antoine Pitrou17babc52012-11-17 23:50:08 +01001376 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1377 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1378 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001379
Antoine Pitrou8572da52012-11-17 23:52:05 +01001380 if zinfo.flag_bits & 0x20:
1381 # Zip 2.7: compressed patched data
1382 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001383
Antoine Pitrou8572da52012-11-17 23:52:05 +01001384 if zinfo.flag_bits & 0x40:
1385 # strong encryption
1386 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001387
Antoine Pitrou17babc52012-11-17 23:50:08 +01001388 if zinfo.flag_bits & 0x800:
1389 # UTF-8 filename
1390 fname_str = fname.decode("utf-8")
1391 else:
1392 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001393
Antoine Pitrou17babc52012-11-17 23:50:08 +01001394 if fname_str != zinfo.orig_filename:
1395 raise BadZipFile(
1396 'File name in directory %r and header %r differ.'
1397 % (zinfo.orig_filename, fname))
1398
1399 # check for encrypted flag & handle password
1400 is_encrypted = zinfo.flag_bits & 0x1
1401 zd = None
1402 if is_encrypted:
1403 if not pwd:
1404 pwd = self.pwd
1405 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001406 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001407 "required for extraction" % name)
1408
1409 zd = _ZipDecrypter(pwd)
1410 # The first 12 bytes in the cypher stream is an encryption header
1411 # used to strengthen the algorithm. The first 11 bytes are
1412 # completely random, while the 12th contains the MSB of the CRC,
1413 # or the MSB of the file time depending on the header type
1414 # and is used to check the correctness of the password.
1415 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001416 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001417 if zinfo.flag_bits & 0x8:
1418 # compare against the file type from extended local headers
1419 check_byte = (zinfo._raw_time >> 8) & 0xff
1420 else:
1421 # compare against the CRC otherwise
1422 check_byte = (zinfo.CRC >> 24) & 0xff
1423 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001424 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001425
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001426 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001427 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001428 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001429 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001430
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001431 def _open_to_write(self, zinfo, force_zip64=False):
1432 if force_zip64 and not self._allowZip64:
1433 raise ValueError(
1434 "force_zip64 is True, but allowZip64 was False when opening "
1435 "the ZIP file."
1436 )
1437 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001438 raise ValueError("Can't write to the ZIP file while there is "
1439 "another write handle open on it. "
1440 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001441
1442 # Sizes and CRC are overwritten with correct data after processing the file
1443 if not hasattr(zinfo, 'file_size'):
1444 zinfo.file_size = 0
1445 zinfo.compress_size = 0
1446 zinfo.CRC = 0
1447
1448 zinfo.flag_bits = 0x00
1449 if zinfo.compress_type == ZIP_LZMA:
1450 # Compressed data includes an end-of-stream (EOS) marker
1451 zinfo.flag_bits |= 0x02
1452 if not self._seekable:
1453 zinfo.flag_bits |= 0x08
1454
1455 if not zinfo.external_attr:
1456 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1457
1458 # Compressed size can be larger than uncompressed size
1459 zip64 = self._allowZip64 and \
1460 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1461
1462 if self._seekable:
1463 self.fp.seek(self.start_dir)
1464 zinfo.header_offset = self.fp.tell()
1465
1466 self._writecheck(zinfo)
1467 self._didModify = True
1468
1469 self.fp.write(zinfo.FileHeader(zip64))
1470
1471 self._writing = True
1472 return _ZipWriteFile(self, zinfo, zip64)
1473
Christian Heimes790c8232008-01-07 21:14:23 +00001474 def extract(self, member, path=None, pwd=None):
1475 """Extract a member from the archive to the current working directory,
1476 using its full name. Its file information is extracted as accurately
1477 as possible. `member' may be a filename or a ZipInfo object. You can
1478 specify a different directory using `path'.
1479 """
Christian Heimes790c8232008-01-07 21:14:23 +00001480 if path is None:
1481 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001482 else:
1483 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001484
1485 return self._extract_member(member, path, pwd)
1486
1487 def extractall(self, path=None, members=None, pwd=None):
1488 """Extract all members from the archive to the current working
1489 directory. `path' specifies a different directory to extract to.
1490 `members' is optional and must be a subset of the list returned
1491 by namelist().
1492 """
1493 if members is None:
1494 members = self.namelist()
1495
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001496 if path is None:
1497 path = os.getcwd()
1498 else:
1499 path = os.fspath(path)
1500
Christian Heimes790c8232008-01-07 21:14:23 +00001501 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001502 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001503
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001504 @classmethod
1505 def _sanitize_windows_name(cls, arcname, pathsep):
1506 """Replace bad characters and remove trailing dots from parts."""
1507 table = cls._windows_illegal_name_trans_table
1508 if not table:
1509 illegal = ':<>|"?*'
1510 table = str.maketrans(illegal, '_' * len(illegal))
1511 cls._windows_illegal_name_trans_table = table
1512 arcname = arcname.translate(table)
1513 # remove trailing dots
1514 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1515 # rejoin, removing empty parts.
1516 arcname = pathsep.join(x for x in arcname if x)
1517 return arcname
1518
Christian Heimes790c8232008-01-07 21:14:23 +00001519 def _extract_member(self, member, targetpath, pwd):
1520 """Extract the ZipInfo object 'member' to a physical
1521 file on the path targetpath.
1522 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001523 if not isinstance(member, ZipInfo):
1524 member = self.getinfo(member)
1525
Christian Heimes790c8232008-01-07 21:14:23 +00001526 # build the destination pathname, replacing
1527 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001528 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001529
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001530 if os.path.altsep:
1531 arcname = arcname.replace(os.path.altsep, os.path.sep)
1532 # interpret absolute pathname as relative, remove drive letter or
1533 # UNC path, redundant separators, "." and ".." components.
1534 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001535 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001536 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001537 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001538 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001539 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001540 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001541
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001542 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001543 targetpath = os.path.normpath(targetpath)
1544
1545 # Create all upper directories if necessary.
1546 upperdirs = os.path.dirname(targetpath)
1547 if upperdirs and not os.path.exists(upperdirs):
1548 os.makedirs(upperdirs)
1549
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001550 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001551 if not os.path.isdir(targetpath):
1552 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001553 return targetpath
1554
Antoine Pitrou17babc52012-11-17 23:50:08 +01001555 with self.open(member, pwd=pwd) as source, \
1556 open(targetpath, "wb") as target:
1557 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001558
1559 return targetpath
1560
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001561 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001562 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001563 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001564 import warnings
1565 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001566 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001567 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001568 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001569 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001570 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001571 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001572 if not self._allowZip64:
1573 requires_zip64 = None
1574 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1575 requires_zip64 = "Files count"
1576 elif zinfo.file_size > ZIP64_LIMIT:
1577 requires_zip64 = "Filesize"
1578 elif zinfo.header_offset > ZIP64_LIMIT:
1579 requires_zip64 = "Zipfile size"
1580 if requires_zip64:
1581 raise LargeZipFile(requires_zip64 +
1582 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001583
1584 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001585 """Put the bytes from filename into the archive under the name
1586 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001587 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001588 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001589 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001590 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001591 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001592 "Can't write to ZIP archive while an open writing handle exists"
1593 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001594
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001595 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001596
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001597 if zinfo.is_dir():
1598 zinfo.compress_size = 0
1599 zinfo.CRC = 0
1600 else:
1601 if compress_type is not None:
1602 zinfo.compress_type = compress_type
1603 else:
1604 zinfo.compress_type = self.compression
1605
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001606 if zinfo.is_dir():
1607 with self._lock:
1608 if self._seekable:
1609 self.fp.seek(self.start_dir)
1610 zinfo.header_offset = self.fp.tell() # Start of header bytes
1611 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001612 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001613 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001614
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001615 self._writecheck(zinfo)
1616 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001617
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001618 self.filelist.append(zinfo)
1619 self.NameToInfo[zinfo.filename] = zinfo
1620 self.fp.write(zinfo.FileHeader(False))
1621 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001622 else:
1623 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1624 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001625
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001626 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001627 """Write a file into the archive. The contents is 'data', which
1628 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1629 it is encoded as UTF-8 first.
1630 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001631 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001632 if isinstance(data, str):
1633 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001634 if not isinstance(zinfo_or_arcname, ZipInfo):
1635 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001636 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001637 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001638 if zinfo.filename[-1] == '/':
1639 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1640 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1641 else:
1642 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001643 else:
1644 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001645
1646 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001647 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001648 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001649 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001650 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001651 "Can't write to ZIP archive while an open writing handle exists."
1652 )
1653
1654 if compress_type is not None:
1655 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001656
Guido van Rossum85825dc2007-08-27 17:03:28 +00001657 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001658 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001659 with self.open(zinfo, mode='w') as dest:
1660 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001661
1662 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001663 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001664 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001665
1666 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001667 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001668 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001669 if self.fp is None:
1670 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001671
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001672 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001673 raise ValueError("Can't close the ZIP file while there is "
1674 "an open writing handle on it. "
1675 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001676
Antoine Pitrou17babc52012-11-17 23:50:08 +01001677 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001678 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001679 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001680 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001681 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001682 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001683 finally:
1684 fp = self.fp
1685 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001686 self._fpclose(fp)
1687
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001688 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001689 for zinfo in self.filelist: # write central directory
1690 dt = zinfo.date_time
1691 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1692 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1693 extra = []
1694 if zinfo.file_size > ZIP64_LIMIT \
1695 or zinfo.compress_size > ZIP64_LIMIT:
1696 extra.append(zinfo.file_size)
1697 extra.append(zinfo.compress_size)
1698 file_size = 0xffffffff
1699 compress_size = 0xffffffff
1700 else:
1701 file_size = zinfo.file_size
1702 compress_size = zinfo.compress_size
1703
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001704 header_offset = zinfo.header_offset - self._start_disk
1705 if header_offset > ZIP64_LIMIT:
1706 extra.append(header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001707 header_offset = 0xffffffff
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001708
1709 extra_data = zinfo.extra
1710 min_version = 0
1711 if extra:
1712 # Append a ZIP64 field to the extra's
1713 extra_data = struct.pack(
1714 '<HH' + 'Q'*len(extra),
1715 1, 8*len(extra), *extra) + extra_data
1716
1717 min_version = ZIP64_VERSION
1718
1719 if zinfo.compress_type == ZIP_BZIP2:
1720 min_version = max(BZIP2_VERSION, min_version)
1721 elif zinfo.compress_type == ZIP_LZMA:
1722 min_version = max(LZMA_VERSION, min_version)
1723
1724 extract_version = max(min_version, zinfo.extract_version)
1725 create_version = max(min_version, zinfo.create_version)
1726 try:
1727 filename, flag_bits = zinfo._encodeFilenameFlags()
1728 centdir = struct.pack(structCentralDir,
1729 stringCentralDir, create_version,
1730 zinfo.create_system, extract_version, zinfo.reserved,
1731 flag_bits, zinfo.compress_type, dostime, dosdate,
1732 zinfo.CRC, compress_size, file_size,
1733 len(filename), len(extra_data), len(zinfo.comment),
1734 0, zinfo.internal_attr, zinfo.external_attr,
1735 header_offset)
1736 except DeprecationWarning:
1737 print((structCentralDir, stringCentralDir, create_version,
1738 zinfo.create_system, extract_version, zinfo.reserved,
1739 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1740 zinfo.CRC, compress_size, file_size,
1741 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1742 0, zinfo.internal_attr, zinfo.external_attr,
1743 header_offset), file=sys.stderr)
1744 raise
1745 self.fp.write(centdir)
1746 self.fp.write(filename)
1747 self.fp.write(extra_data)
1748 self.fp.write(zinfo.comment)
1749
1750 pos2 = self.fp.tell()
1751 # Write end-of-zip-archive record
1752 centDirCount = len(self.filelist)
1753 centDirSize = pos2 - self.start_dir
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001754 centDirOffset = self.start_dir - self._start_disk
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001755 requires_zip64 = None
1756 if centDirCount > ZIP_FILECOUNT_LIMIT:
1757 requires_zip64 = "Files count"
1758 elif centDirOffset > ZIP64_LIMIT:
1759 requires_zip64 = "Central directory offset"
1760 elif centDirSize > ZIP64_LIMIT:
1761 requires_zip64 = "Central directory size"
1762 if requires_zip64:
1763 # Need to write the ZIP64 end-of-archive records
1764 if not self._allowZip64:
1765 raise LargeZipFile(requires_zip64 +
1766 " would require ZIP64 extensions")
1767 zip64endrec = struct.pack(
1768 structEndArchive64, stringEndArchive64,
1769 44, 45, 45, 0, 0, centDirCount, centDirCount,
1770 centDirSize, centDirOffset)
1771 self.fp.write(zip64endrec)
1772
1773 zip64locrec = struct.pack(
1774 structEndArchive64Locator,
1775 stringEndArchive64Locator, 0, pos2, 1)
1776 self.fp.write(zip64locrec)
1777 centDirCount = min(centDirCount, 0xFFFF)
1778 centDirSize = min(centDirSize, 0xFFFFFFFF)
1779 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1780
1781 endrec = struct.pack(structEndArchive, stringEndArchive,
1782 0, 0, centDirCount, centDirCount,
1783 centDirSize, centDirOffset, len(self._comment))
1784 self.fp.write(endrec)
1785 self.fp.write(self._comment)
1786 self.fp.flush()
1787
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001788 def _fpclose(self, fp):
1789 assert self._fileRefCnt > 0
1790 self._fileRefCnt -= 1
1791 if not self._fileRefCnt and not self._filePassed:
1792 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001793
1794
1795class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001796 """Class to create ZIP archives with Python library files and packages."""
1797
Georg Brandl8334fd92010-12-04 10:26:46 +00001798 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001799 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001800 ZipFile.__init__(self, file, mode=mode, compression=compression,
1801 allowZip64=allowZip64)
1802 self._optimize = optimize
1803
Christian Tismer59202e52013-10-21 03:59:23 +02001804 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001805 """Add all files from "pathname" to the ZIP archive.
1806
Fred Drake484d7352000-10-02 21:14:52 +00001807 If pathname is a package directory, search the directory and
1808 all package subdirectories recursively for all *.py and enter
1809 the modules into the archive. If pathname is a plain
1810 directory, listdir *.py and enter all modules. Else, pathname
1811 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001812 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001813 This method will compile the module.py into module.pyc if
1814 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001815 If filterfunc(pathname) is given, it is called with every argument.
1816 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001817 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001818 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001819 if filterfunc and not filterfunc(pathname):
1820 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001821 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001822 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001823 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001824 dir, name = os.path.split(pathname)
1825 if os.path.isdir(pathname):
1826 initname = os.path.join(pathname, "__init__.py")
1827 if os.path.isfile(initname):
1828 # This is a package directory, add it
1829 if basename:
1830 basename = "%s/%s" % (basename, name)
1831 else:
1832 basename = name
1833 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001834 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001835 fname, arcname = self._get_codename(initname[0:-3], basename)
1836 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001837 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001838 self.write(fname, arcname)
1839 dirlist = os.listdir(pathname)
1840 dirlist.remove("__init__.py")
1841 # Add all *.py files and package subdirectories
1842 for filename in dirlist:
1843 path = os.path.join(pathname, filename)
1844 root, ext = os.path.splitext(filename)
1845 if os.path.isdir(path):
1846 if os.path.isfile(os.path.join(path, "__init__.py")):
1847 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001848 self.writepy(path, basename,
1849 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001850 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001851 if filterfunc and not filterfunc(path):
1852 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001853 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001854 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001855 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001856 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001857 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001858 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001859 self.write(fname, arcname)
1860 else:
1861 # This is NOT a package directory, add its files at top level
1862 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001863 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001864 for filename in os.listdir(pathname):
1865 path = os.path.join(pathname, filename)
1866 root, ext = os.path.splitext(filename)
1867 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001868 if filterfunc and not filterfunc(path):
1869 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001870 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001871 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001872 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001873 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001874 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001875 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001876 self.write(fname, arcname)
1877 else:
1878 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001879 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001880 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001881 fname, arcname = self._get_codename(pathname[0:-3], basename)
1882 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001883 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001884 self.write(fname, arcname)
1885
1886 def _get_codename(self, pathname, basename):
1887 """Return (filename, archivename) for the path.
1888
Fred Drake484d7352000-10-02 21:14:52 +00001889 Given a module name path, return the correct file path and
1890 archive name, compiling if necessary. For example, given
1891 /python/lib/string, return (/python/lib/string.pyc, string).
1892 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001893 def _compile(file, optimize=-1):
1894 import py_compile
1895 if self.debug:
1896 print("Compiling", file)
1897 try:
1898 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001899 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001900 print(err.msg)
1901 return False
1902 return True
1903
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001904 file_py = pathname + ".py"
1905 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001906 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1907 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1908 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001909 if self._optimize == -1:
1910 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001911 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001912 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1913 # Use .pyc file.
1914 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001915 elif (os.path.isfile(pycache_opt0) and
1916 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001917 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1918 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001919 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001920 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001921 elif (os.path.isfile(pycache_opt1) and
1922 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1923 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001924 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001925 fname = pycache_opt1
1926 arcname = file_pyc
1927 elif (os.path.isfile(pycache_opt2) and
1928 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1929 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1930 # file name in the archive.
1931 fname = pycache_opt2
1932 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001933 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 # Compile py into PEP 3147 pyc file.
1935 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001936 if sys.flags.optimize == 0:
1937 fname = pycache_opt0
1938 elif sys.flags.optimize == 1:
1939 fname = pycache_opt1
1940 else:
1941 fname = pycache_opt2
1942 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001943 else:
1944 fname = arcname = file_py
1945 else:
1946 # new mode: use given optimization level
1947 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001948 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001949 arcname = file_pyc
1950 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001951 arcname = file_pyc
1952 if self._optimize == 1:
1953 fname = pycache_opt1
1954 elif self._optimize == 2:
1955 fname = pycache_opt2
1956 else:
1957 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1958 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001959 if not (os.path.isfile(fname) and
1960 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1961 if not _compile(file_py, optimize=self._optimize):
1962 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001963 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001964 if basename:
1965 archivename = "%s/%s" % (basename, archivename)
1966 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001967
1968
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001969def main(args=None):
1970 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001971
Serhiy Storchaka150cd192017-04-07 18:56:12 +03001972 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001973 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03001974 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001975 group.add_argument('-l', '--list', metavar='<zipfile>',
1976 help='Show listing of a zipfile')
1977 group.add_argument('-e', '--extract', nargs=2,
1978 metavar=('<zipfile>', '<output_dir>'),
1979 help='Extract zipfile into target dir')
1980 group.add_argument('-c', '--create', nargs='+',
1981 metavar=('<name>', '<file>'),
1982 help='Create zipfile from sources')
1983 group.add_argument('-t', '--test', metavar='<zipfile>',
1984 help='Test if a zipfile is valid')
1985 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001986
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001987 if args.test is not None:
1988 src = args.test
1989 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001990 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001991 if badfile:
1992 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001993 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001994
Serhiy Storchaka8c933102016-10-23 13:32:12 +03001995 elif args.list is not None:
1996 src = args.list
1997 with ZipFile(src, 'r') as zf:
1998 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001999
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002000 elif args.extract is not None:
2001 src, curdir = args.extract
2002 with ZipFile(src, 'r') as zf:
2003 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002004
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002005 elif args.create is not None:
2006 zip_name = args.create.pop(0)
2007 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002008
2009 def addToZip(zf, path, zippath):
2010 if os.path.isfile(path):
2011 zf.write(path, zippath, ZIP_DEFLATED)
2012 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002013 if zippath:
2014 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002015 for nm in os.listdir(path):
2016 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002017 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002018 # else: ignore
2019
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002020 with ZipFile(zip_name, 'w') as zf:
2021 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002022 zippath = os.path.basename(path)
2023 if not zippath:
2024 zippath = os.path.basename(os.path.dirname(path))
2025 if zippath in ('', os.curdir, os.pardir):
2026 zippath = ''
2027 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002028
2029if __name__ == "__main__":
2030 main()