blob: 0eed4ce9a63441e43bd36e8a05990eeb4bf9f980 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04006import binascii
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04007import importlib.util
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import io
shireenraoa4e29912019-08-24 11:26:41 -04009import itertools
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import os
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040011import posixpath
Barry Warsaw28a691b2010-04-17 00:19:56 +000012import shutil
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040013import stat
Barry Warsaw28a691b2010-04-17 00:19:56 +000014import struct
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040015import sys
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020016import threading
Jason R. Coombsb2758ff2019-05-08 09:45:06 -040017import time
Jason R. Coombse5bd7362020-02-11 21:58:47 -050018import contextlib
Jason R. Coombsd1a0a962020-10-25 14:45:05 -040019import pathlib
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020
21try:
Tim Peterse1190062001-01-15 03:34:38 +000022 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040024except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000026 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000027
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028try:
29 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040030except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020031 bz2 = None
32
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033try:
34 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040035except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 lzma = None
37
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020038__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020039 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Zackery Spytz9a81ab12020-03-23 07:29:36 -060040 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41 "Path"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Georg Brandl4d540882010-10-28 06:42:33 +000043class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
Georg Brandl4d540882010-10-28 06:42:33 +000053error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000056ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030057ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000058ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020064ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000065# Other ZIP compression methods not supported
66
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020067DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020071# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020072MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020073
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000079
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000082structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000102stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000103sizeCentralDir = struct.calcsize(structCentralDir)
104
Fred Drake3e038e52001-02-28 17:56:26 +0000105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000129stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130sizeFileHeader = struct.calcsize(structFileHeader)
131
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
Silas Sewell4ba3b502018-09-18 13:00:05 -0400167_DD_SIGNATURE = 0x08074b50
168
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300169_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
170
171def _strip_extra(extra, xids):
172 # Remove Extra Fields with specified IDs.
173 unpack = _EXTRA_FIELD_STRUCT.unpack
174 modified = False
175 buffer = []
176 start = i = 0
177 while i + 4 <= len(extra):
178 xid, xlen = unpack(extra[i : i + 4])
179 j = i + 4 + xlen
180 if xid in xids:
181 if i != start:
182 buffer.append(extra[start : i])
183 start = j
184 modified = True
185 i = j
186 if not modified:
187 return extra
188 return b''.join(buffer)
189
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000190def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000191 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000192 if _EndRecData(fp):
193 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200194 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000195 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000196 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000197
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000198def is_zipfile(filename):
199 """Quickly see if a file is a ZIP file by checking the magic number.
200
201 The filename argument may be a file or file-like object too.
202 """
203 result = False
204 try:
205 if hasattr(filename, "read"):
206 result = _check_zipfile(fp=filename)
207 else:
208 with open(filename, "rb") as fp:
209 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200210 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000211 pass
212 return result
213
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214def _EndRecData64(fpin, offset, endrec):
215 """
216 Read the ZIP64 end-of-archive records and use that to update endrec
217 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000218 try:
219 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200220 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000221 # If the seek fails, the file is not large enough to contain a ZIP64
222 # end-of-archive record, so just return the end record we were given.
223 return endrec
224
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200226 if len(data) != sizeEndCentDir64Locator:
227 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000228 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
229 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000230 return endrec
231
Francisco Facioniab0716e2019-05-29 00:15:11 +0100232 if diskno != 0 or disks > 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000233 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000234
235 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
237 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200238 if len(data) != sizeEndCentDir64:
239 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000240 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200241 dircount, dircount2, dirsize, diroffset = \
242 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000243 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000244 return endrec
245
246 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000247 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000248 endrec[_ECD_DISK_NUMBER] = disk_num
249 endrec[_ECD_DISK_START] = disk_dir
250 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
251 endrec[_ECD_ENTRIES_TOTAL] = dircount2
252 endrec[_ECD_SIZE] = dirsize
253 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000254 return endrec
255
256
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000257def _EndRecData(fpin):
258 """Return data from the "End of Central Directory" record, or None.
259
260 The data is a list of the nine items in the ZIP "End of central dir"
261 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
263 # Determine file size
264 fpin.seek(0, 2)
265 filesize = fpin.tell()
266
267 # Check to see if this is ZIP file with no archive comment (the
268 # "end of central directory" structure should be the last item in the
269 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000270 try:
271 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200272 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000273 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000274 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200275 if (len(data) == sizeEndCentDir and
276 data[0:4] == stringEndArchive and
277 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000279 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000280 endrec=list(endrec)
281
282 # Append a blank comment and record start offset
283 endrec.append(b"")
284 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000285
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000286 # Try to read the "Zip64 end of central directory" structure
287 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000288
289 # Either this is not a ZIP file, or it is a ZIP file with an archive
290 # comment. Search the end of the file for the "end of central directory"
291 # record signature. The comment is the last item in the ZIP file and may be
292 # up to 64K long. It is assumed that the "end of central directory" magic
293 # number does not appear in the comment.
294 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
295 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000296 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000297 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000298 if start >= 0:
299 # found the magic number; attempt to unpack and interpret
300 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200301 if len(recData) != sizeEndCentDir:
302 # Zip file is corrupted.
303 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000304 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400305 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
306 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
307 endrec.append(comment)
308 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000309
R David Murray4fbb9db2011-06-09 15:50:51 -0400310 # Try to read the "Zip64 end of central directory" structure
311 return _EndRecData64(fpin, maxCommentStart + start - filesize,
312 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000313
314 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200315 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000316
Fred Drake484d7352000-10-02 21:14:52 +0000317
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000319 """Class with attributes describing each file in the ZIP archive."""
320
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000321 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200322 'orig_filename',
323 'filename',
324 'date_time',
325 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600326 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200327 'comment',
328 'extra',
329 'create_system',
330 'create_version',
331 'extract_version',
332 'reserved',
333 'flag_bits',
334 'volume',
335 'internal_attr',
336 'external_attr',
337 'header_offset',
338 'CRC',
339 'compress_size',
340 'file_size',
341 '_raw_time',
342 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000343
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000344 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000345 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346
347 # Terminate the file name at the first null byte. Null bytes in file
348 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000349 null_byte = filename.find(chr(0))
350 if null_byte >= 0:
351 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 # This is used to ensure paths in generated ZIP files always use
353 # forward slashes as the directory separator, as required by the
354 # ZIP format specification.
355 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000356 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357
Greg Ward8e36d282003-06-18 00:53:06 +0000358 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000359 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800360
361 if date_time[0] < 1980:
362 raise ValueError('ZIP does not support timestamps before 1980')
363
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000364 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000365 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600366 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000367 self.comment = b"" # Comment for each file
368 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000369 if sys.platform == 'win32':
370 self.create_system = 0 # System which created ZIP archive
371 else:
372 # Assume everything else is unix-y
373 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200374 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
375 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000376 self.reserved = 0 # Must be zero
377 self.flag_bits = 0 # ZIP flag bits
378 self.volume = 0 # Volume number of file header
379 self.internal_attr = 0 # Internal attributes
380 self.external_attr = 0 # External file attributes
Mickaël Schoentgen992347d2019-09-09 15:08:54 +0200381 self.compress_size = 0 # Size of the compressed file
382 self.file_size = 0 # Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000384 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000385 # CRC CRC-32 of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200387 def __repr__(self):
388 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
389 if self.compress_type != ZIP_STORED:
390 result.append(' compress_type=%s' %
391 compressor_names.get(self.compress_type,
392 self.compress_type))
393 hi = self.external_attr >> 16
394 lo = self.external_attr & 0xFFFF
395 if hi:
396 result.append(' filemode=%r' % stat.filemode(hi))
397 if lo:
398 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200399 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200400 if not isdir or self.file_size:
401 result.append(' file_size=%r' % self.file_size)
402 if ((not isdir or self.compress_size) and
403 (self.compress_type != ZIP_STORED or
404 self.file_size != self.compress_size)):
405 result.append(' compress_size=%r' % self.compress_size)
406 result.append('>')
407 return ''.join(result)
408
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200409 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200410 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000411 dt = self.date_time
412 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000413 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000414 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000415 # Set these to zero because we write them after the file data
416 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000417 else:
Tim Peterse1190062001-01-15 03:34:38 +0000418 CRC = self.CRC
419 compress_size = self.compress_size
420 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000421
422 extra = self.extra
423
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200424 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200425 if zip64 is None:
426 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
427 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000428 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000429 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200430 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200431 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
432 if not zip64:
433 raise LargeZipFile("Filesize would require ZIP64 extensions")
434 # File is larger than what fits into a 4 byte integer,
435 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000436 file_size = 0xffffffff
437 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200438 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000439
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200440 if self.compress_type == ZIP_BZIP2:
441 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200442 elif self.compress_type == ZIP_LZMA:
443 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200444
445 self.extract_version = max(min_version, self.extract_version)
446 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000447 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000448 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200449 self.extract_version, self.reserved, flag_bits,
450 self.compress_type, dostime, dosdate, CRC,
451 compress_size, file_size,
452 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000453 return header + filename + extra
454
455 def _encodeFilenameFlags(self):
456 try:
457 return self.filename.encode('ascii'), self.flag_bits
458 except UnicodeEncodeError:
459 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000460
461 def _decodeExtra(self):
462 # Try to decode the extra field.
463 extra = self.extra
464 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700465 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000466 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200467 if ln+4 > len(extra):
468 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
469 if tp == 0x0001:
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200470 data = extra[4:ln+4]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000471 # ZIP64 extension (large files and/or large archives)
Serhiy Storchakae27449d2019-11-09 13:13:36 +0200472 try:
473 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
474 field = "File size"
475 self.file_size, = unpack('<Q', data[:8])
476 data = data[8:]
477 if self.compress_size == 0xFFFF_FFFF:
478 field = "Compress size"
479 self.compress_size, = unpack('<Q', data[:8])
480 data = data[8:]
481 if self.header_offset == 0xFFFF_FFFF:
482 field = "Header offset"
483 self.header_offset, = unpack('<Q', data[:8])
484 except struct.error:
485 raise BadZipFile(f"Corrupt zip64 extra field. "
486 f"{field} not found.") from None
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000487
488 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000489
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200490 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200491 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200492 """Construct an appropriate ZipInfo for a file on the filesystem.
493
494 filename should be the path to a file or directory on the filesystem.
495
496 arcname is the name which it will have within the archive (by default,
497 this will be the same as filename, but without a drive letter and with
498 leading path separators removed).
499 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200500 if isinstance(filename, os.PathLike):
501 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200502 st = os.stat(filename)
503 isdir = stat.S_ISDIR(st.st_mode)
504 mtime = time.localtime(st.st_mtime)
505 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200506 if not strict_timestamps and date_time[0] < 1980:
507 date_time = (1980, 1, 1, 0, 0, 0)
508 elif not strict_timestamps and date_time[0] > 2107:
509 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200510 # Create ZipInfo instance to store file information
511 if arcname is None:
512 arcname = filename
513 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
514 while arcname[0] in (os.sep, os.altsep):
515 arcname = arcname[1:]
516 if isdir:
517 arcname += '/'
518 zinfo = cls(arcname, date_time)
519 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
520 if isdir:
521 zinfo.file_size = 0
522 zinfo.external_attr |= 0x10 # MS-DOS directory flag
523 else:
524 zinfo.file_size = st.st_size
525
526 return zinfo
527
528 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300529 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200530 return self.filename[-1] == '/'
531
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000532
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
534# internal keys. We noticed that a direct implementation is faster than
535# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000536
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300537_crctable = None
538def _gen_crc(crc):
539 for j in range(8):
540 if crc & 1:
541 crc = (crc >> 1) ^ 0xEDB88320
542 else:
543 crc >>= 1
544 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000545
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300546# ZIP supports a password-based form of encryption. Even though known
547# plaintext attacks have been found against it, it is still useful
548# to be able to get data out of such a file.
549#
550# Usage:
551# zd = _ZipDecrypter(mypwd)
552# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000553
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300554def _ZipDecrypter(pwd):
555 key0 = 305419896
556 key1 = 591751049
557 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000558
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300559 global _crctable
560 if _crctable is None:
561 _crctable = list(map(_gen_crc, range(256)))
562 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300564 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300568 def update_keys(c):
569 nonlocal key0, key1, key2
570 key0 = crc32(c, key0)
571 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
572 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
573 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000574
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300575 for p in pwd:
576 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000577
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300578 def decrypter(data):
579 """Decrypt a bytes object."""
580 result = bytearray()
581 append = result.append
582 for c in data:
583 k = key2 | 2
584 c ^= ((k * (k^1)) >> 8) & 0xFF
585 update_keys(c)
586 append(c)
587 return bytes(result)
588
589 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000590
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200591
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200592class LZMACompressor:
593
594 def __init__(self):
595 self._comp = None
596
597 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200598 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200600 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200601 ])
602 return struct.pack('<BBH', 9, 4, len(props)) + props
603
604 def compress(self, data):
605 if self._comp is None:
606 return self._init() + self._comp.compress(data)
607 return self._comp.compress(data)
608
609 def flush(self):
610 if self._comp is None:
611 return self._init() + self._comp.flush()
612 return self._comp.flush()
613
614
615class LZMADecompressor:
616
617 def __init__(self):
618 self._decomp = None
619 self._unconsumed = b''
620 self.eof = False
621
622 def decompress(self, data):
623 if self._decomp is None:
624 self._unconsumed += data
625 if len(self._unconsumed) <= 4:
626 return b''
627 psize, = struct.unpack('<H', self._unconsumed[2:4])
628 if len(self._unconsumed) <= 4 + psize:
629 return b''
630
631 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200632 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
633 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200634 ])
635 data = self._unconsumed[4 + psize:]
636 del self._unconsumed
637
638 result = self._decomp.decompress(data)
639 self.eof = self._decomp.eof
640 return result
641
642
643compressor_names = {
644 0: 'store',
645 1: 'shrink',
646 2: 'reduce',
647 3: 'reduce',
648 4: 'reduce',
649 5: 'reduce',
650 6: 'implode',
651 7: 'tokenize',
652 8: 'deflate',
653 9: 'deflate64',
654 10: 'implode',
655 12: 'bzip2',
656 14: 'lzma',
657 18: 'terse',
658 19: 'lz77',
659 97: 'wavpack',
660 98: 'ppmd',
661}
662
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200663def _check_compression(compression):
664 if compression == ZIP_STORED:
665 pass
666 elif compression == ZIP_DEFLATED:
667 if not zlib:
668 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200669 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200670 elif compression == ZIP_BZIP2:
671 if not bz2:
672 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200673 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200674 elif compression == ZIP_LZMA:
675 if not lzma:
676 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200677 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300679 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200680
681
Bo Baylesce237c72018-01-29 23:54:07 -0600682def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200683 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600684 if compresslevel is not None:
685 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
686 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600688 if compresslevel is not None:
689 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600691 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200692 elif compress_type == ZIP_LZMA:
693 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694 else:
695 return None
696
697
698def _get_decompressor(compress_type):
Berker Peksag2f1b8572019-09-12 17:13:44 +0300699 _check_compression(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200700 if compress_type == ZIP_STORED:
701 return None
702 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200703 return zlib.decompressobj(-15)
704 elif compress_type == ZIP_BZIP2:
705 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200706 elif compress_type == ZIP_LZMA:
707 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200708 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200709 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200710 if descr:
711 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
712 else:
713 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200714
715
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200716class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300717 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200718 self._file = file
719 self._pos = pos
720 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200721 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300722 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700723 self.seekable = file.seekable
724 self.tell = file.tell
725
726 def seek(self, offset, whence=0):
727 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200728 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700729 raise ValueError("Can't reposition in the ZIP file while "
730 "there is an open writing handle on it. "
731 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200732 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700733 self._pos = self._file.tell()
734 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200735
736 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200737 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300738 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300739 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300740 "is an open writing handle on it. "
741 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200742 self._file.seek(self._pos)
743 data = self._file.read(n)
744 self._pos = self._file.tell()
745 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200746
747 def close(self):
748 if self._file is not None:
749 fileobj = self._file
750 self._file = None
751 self._close(fileobj)
752
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200753# Provide the tell method for unseekable stream
754class _Tellable:
755 def __init__(self, fp):
756 self.fp = fp
757 self.offset = 0
758
759 def write(self, data):
760 n = self.fp.write(data)
761 self.offset += n
762 return n
763
764 def tell(self):
765 return self.offset
766
767 def flush(self):
768 self.fp.flush()
769
770 def close(self):
771 self.fp.close()
772
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200773
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000774class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775 """File-like object for reading an archive member.
776 Is returned by ZipFile.open().
777 """
778
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000779 # Max size supported by decompressor.
780 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000782 # Read from compressed files in 4k blocks.
783 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000784
John Jolly066df4f2018-01-30 01:51:35 -0700785 # Chunk size to read during seek
786 MAX_SEEK_READ = 1 << 24
787
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200788 def __init__(self, fileobj, mode, zipinfo, pwd=None,
Łukasz Langae94980a2010-11-22 23:31:26 +0000789 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000790 self._fileobj = fileobj
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200791 self._pwd = pwd
Łukasz Langae94980a2010-11-22 23:31:26 +0000792 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793
Ezio Melotti92b47432010-01-28 01:44:41 +0000794 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000795 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200796 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000797
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200798 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000799
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200800 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000801 self._readbuffer = b''
802 self._offset = 0
803
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804 self.newlines = None
805
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000806 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000807 self.name = zipinfo.filename
808
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000809 if hasattr(zipinfo, 'CRC'):
810 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000811 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000812 else:
813 self._expected_crc = None
814
John Jolly066df4f2018-01-30 01:51:35 -0700815 self._seekable = False
816 try:
817 if fileobj.seekable():
818 self._orig_compress_start = fileobj.tell()
819 self._orig_compress_size = zipinfo.compress_size
820 self._orig_file_size = zipinfo.file_size
821 self._orig_start_crc = self._running_crc
822 self._seekable = True
823 except AttributeError:
824 pass
825
Serhiy Storchaka5c32af72019-10-27 10:22:14 +0200826 self._decrypter = None
827 if pwd:
828 if zipinfo.flag_bits & 0x8:
829 # compare against the file type from extended local headers
830 check_byte = (zipinfo._raw_time >> 8) & 0xff
831 else:
832 # compare against the CRC otherwise
833 check_byte = (zipinfo.CRC >> 24) & 0xff
834 h = self._init_decrypter()
835 if h != check_byte:
836 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
837
838
839 def _init_decrypter(self):
840 self._decrypter = _ZipDecrypter(self._pwd)
841 # The first 12 bytes in the cypher stream is an encryption header
842 # used to strengthen the algorithm. The first 11 bytes are
843 # completely random, while the 12th contains the MSB of the CRC,
844 # or the MSB of the file time depending on the header type
845 # and is used to check the correctness of the password.
846 header = self._fileobj.read(12)
847 self._compress_left -= 12
848 return self._decrypter(header)[11]
849
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200850 def __repr__(self):
851 result = ['<%s.%s' % (self.__class__.__module__,
852 self.__class__.__qualname__)]
853 if not self.closed:
854 result.append(' name=%r mode=%r' % (self.name, self.mode))
855 if self._compress_type != ZIP_STORED:
856 result.append(' compress_type=%s' %
857 compressor_names.get(self._compress_type,
858 self._compress_type))
859 else:
860 result.append(' [closed]')
861 result.append('>')
862 return ''.join(result)
863
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000864 def readline(self, limit=-1):
865 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000867 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000869
Serhiy Storchakae670be22016-06-11 19:32:44 +0300870 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000871 # Shortcut common case - newline found in buffer.
872 i = self._readbuffer.find(b'\n', self._offset) + 1
873 if i > 0:
874 line = self._readbuffer[self._offset: i]
875 self._offset = i
876 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877
Serhiy Storchakae670be22016-06-11 19:32:44 +0300878 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000879
880 def peek(self, n=1):
881 """Returns buffered bytes without advancing the position."""
882 if n > len(self._readbuffer) - self._offset:
883 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200884 if len(chunk) > self._offset:
885 self._readbuffer = chunk + self._readbuffer[self._offset:]
886 self._offset = 0
887 else:
888 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000889
890 # Return up to 512 bytes to reduce allocation overhead for tight loops.
891 return self._readbuffer[self._offset: self._offset + 512]
892
893 def readable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +1100894 if self.closed:
895 raise ValueError("I/O operation on closed file.")
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000896 return True
897
898 def read(self, n=-1):
899 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800900 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 """
Daniel Hillier8d62df62019-11-30 19:30:47 +1100902 if self.closed:
903 raise ValueError("read from closed file.")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200904 if n is None or n < 0:
905 buf = self._readbuffer[self._offset:]
906 self._readbuffer = b''
907 self._offset = 0
908 while not self._eof:
909 buf += self._read1(self.MAX_N)
910 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911
Antoine Pitrou78157b32012-06-23 16:44:48 +0200912 end = n + self._offset
913 if end < len(self._readbuffer):
914 buf = self._readbuffer[self._offset:end]
915 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200916 return buf
917
Antoine Pitrou78157b32012-06-23 16:44:48 +0200918 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200919 buf = self._readbuffer[self._offset:]
920 self._readbuffer = b''
921 self._offset = 0
922 while n > 0 and not self._eof:
923 data = self._read1(n)
924 if n < len(data):
925 self._readbuffer = data
926 self._offset = n
927 buf += data[:n]
928 break
929 buf += data
930 n -= len(data)
931 return buf
932
933 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000934 # Update the CRC using the given data.
935 if self._expected_crc is None:
936 # No need to compute the CRC if we don't have a reference value
937 return
Martin Panterb82032f2015-12-11 05:19:29 +0000938 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000939 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000941 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000942
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000943 def read1(self, n):
944 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200946 if n is None or n < 0:
947 buf = self._readbuffer[self._offset:]
948 self._readbuffer = b''
949 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300950 while not self._eof:
951 data = self._read1(self.MAX_N)
952 if data:
953 buf += data
954 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200955 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000956
Antoine Pitrou78157b32012-06-23 16:44:48 +0200957 end = n + self._offset
958 if end < len(self._readbuffer):
959 buf = self._readbuffer[self._offset:end]
960 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200961 return buf
962
Antoine Pitrou78157b32012-06-23 16:44:48 +0200963 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200964 buf = self._readbuffer[self._offset:]
965 self._readbuffer = b''
966 self._offset = 0
967 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300968 while not self._eof:
969 data = self._read1(n)
970 if n < len(data):
971 self._readbuffer = data
972 self._offset = n
973 buf += data[:n]
974 break
975 if data:
976 buf += data
977 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200978 return buf
979
980 def _read1(self, n):
981 # Read up to n compressed bytes with at most one read() system call,
982 # decrypt and decompress them.
983 if self._eof or n <= 0:
984 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000985
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000986 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200987 if self._compress_type == ZIP_DEFLATED:
988 ## Handle unconsumed data.
989 data = self._decompressor.unconsumed_tail
990 if n > len(data):
991 data += self._read2(n - len(data))
992 else:
993 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000994
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200995 if self._compress_type == ZIP_STORED:
996 self._eof = self._compress_left <= 0
997 elif self._compress_type == ZIP_DEFLATED:
998 n = max(n, self.MIN_READ_SIZE)
999 data = self._decompressor.decompress(data, n)
1000 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +02001001 self._compress_left <= 0 and
1002 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001003 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001004 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001005 else:
1006 data = self._decompressor.decompress(data)
1007 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +00001008
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001009 data = data[:self._left]
1010 self._left -= len(data)
1011 if self._left <= 0:
1012 self._eof = True
1013 self._update_crc(data)
1014 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001015
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001016 def _read2(self, n):
1017 if self._compress_left <= 0:
1018 return b''
1019
1020 n = max(n, self.MIN_READ_SIZE)
1021 n = min(n, self._compress_left)
1022
1023 data = self._fileobj.read(n)
1024 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001025 if not data:
1026 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001027
1028 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001029 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001030 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031
Łukasz Langae94980a2010-11-22 23:31:26 +00001032 def close(self):
1033 try:
1034 if self._close_fileobj:
1035 self._fileobj.close()
1036 finally:
1037 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001038
John Jolly066df4f2018-01-30 01:51:35 -07001039 def seekable(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001040 if self.closed:
1041 raise ValueError("I/O operation on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001042 return self._seekable
1043
1044 def seek(self, offset, whence=0):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001045 if self.closed:
1046 raise ValueError("seek on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001047 if not self._seekable:
1048 raise io.UnsupportedOperation("underlying stream is not seekable")
1049 curr_pos = self.tell()
1050 if whence == 0: # Seek from start of file
1051 new_pos = offset
1052 elif whence == 1: # Seek from current position
1053 new_pos = curr_pos + offset
1054 elif whence == 2: # Seek from EOF
1055 new_pos = self._orig_file_size + offset
1056 else:
1057 raise ValueError("whence must be os.SEEK_SET (0), "
1058 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1059
1060 if new_pos > self._orig_file_size:
1061 new_pos = self._orig_file_size
1062
1063 if new_pos < 0:
1064 new_pos = 0
1065
1066 read_offset = new_pos - curr_pos
1067 buff_offset = read_offset + self._offset
1068
1069 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1070 # Just move the _offset index if the new position is in the _readbuffer
1071 self._offset = buff_offset
1072 read_offset = 0
1073 elif read_offset < 0:
1074 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001075 self._fileobj.seek(self._orig_compress_start)
1076 self._running_crc = self._orig_start_crc
1077 self._compress_left = self._orig_compress_size
1078 self._left = self._orig_file_size
1079 self._readbuffer = b''
1080 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001081 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001082 self._eof = False
1083 read_offset = new_pos
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001084 if self._decrypter is not None:
1085 self._init_decrypter()
John Jolly066df4f2018-01-30 01:51:35 -07001086
1087 while read_offset > 0:
1088 read_len = min(self.MAX_SEEK_READ, read_offset)
1089 self.read(read_len)
1090 read_offset -= read_len
1091
1092 return self.tell()
1093
1094 def tell(self):
Daniel Hillier8d62df62019-11-30 19:30:47 +11001095 if self.closed:
1096 raise ValueError("tell on closed file.")
John Jolly066df4f2018-01-30 01:51:35 -07001097 if not self._seekable:
1098 raise io.UnsupportedOperation("underlying stream is not seekable")
1099 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1100 return filepos
1101
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001102
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001103class _ZipWriteFile(io.BufferedIOBase):
1104 def __init__(self, zf, zinfo, zip64):
1105 self._zinfo = zinfo
1106 self._zip64 = zip64
1107 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001108 self._compressor = _get_compressor(zinfo.compress_type,
1109 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001110 self._file_size = 0
1111 self._compress_size = 0
1112 self._crc = 0
1113
1114 @property
1115 def _fileobj(self):
1116 return self._zipfile.fp
1117
1118 def writable(self):
1119 return True
1120
1121 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001122 if self.closed:
1123 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001124 nbytes = len(data)
1125 self._file_size += nbytes
1126 self._crc = crc32(data, self._crc)
1127 if self._compressor:
1128 data = self._compressor.compress(data)
1129 self._compress_size += len(data)
1130 self._fileobj.write(data)
1131 return nbytes
1132
1133 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001134 if self.closed:
1135 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001136 try:
1137 super().close()
1138 # Flush any data from the compressor, and update header info
1139 if self._compressor:
1140 buf = self._compressor.flush()
1141 self._compress_size += len(buf)
1142 self._fileobj.write(buf)
1143 self._zinfo.compress_size = self._compress_size
1144 else:
1145 self._zinfo.compress_size = self._file_size
1146 self._zinfo.CRC = self._crc
1147 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001148
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001149 # Write updated header info
1150 if self._zinfo.flag_bits & 0x08:
1151 # Write CRC and file sizes after the file data
1152 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1153 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1154 self._zinfo.compress_size, self._zinfo.file_size))
1155 self._zipfile.start_dir = self._fileobj.tell()
1156 else:
1157 if not self._zip64:
1158 if self._file_size > ZIP64_LIMIT:
1159 raise RuntimeError(
1160 'File size unexpectedly exceeded ZIP64 limit')
1161 if self._compress_size > ZIP64_LIMIT:
1162 raise RuntimeError(
1163 'Compressed size unexpectedly exceeded ZIP64 limit')
1164 # Seek backwards and write file header (which will now include
1165 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001166
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001167 # Preserve current position in file
1168 self._zipfile.start_dir = self._fileobj.tell()
1169 self._fileobj.seek(self._zinfo.header_offset)
1170 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1171 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001172
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001173 # Successfully written: Add file to our caches
1174 self._zipfile.filelist.append(self._zinfo)
1175 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1176 finally:
1177 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001178
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001179
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001180
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001182 """ Class with methods to open, read, write, close, list zip files.
1183
Bo Baylesce237c72018-01-29 23:54:07 -06001184 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1185 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001186
Fred Drake3d9091e2001-03-26 15:49:24 +00001187 file: Either the path to the file, or a file-like object.
1188 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001189 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1190 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001191 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1192 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001193 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1194 needed, otherwise it will raise an exception when this would
1195 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001196 compresslevel: None (default for the given compression type) or an integer
1197 specifying the level to pass to the compressor.
1198 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1199 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1200 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001201
Fred Drake3d9091e2001-03-26 15:49:24 +00001202 """
Fred Drake484d7352000-10-02 21:14:52 +00001203
Fred Drake90eac282001-02-28 05:29:34 +00001204 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001205 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001206
Bo Baylesce237c72018-01-29 23:54:07 -06001207 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001208 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001209 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1210 or append 'a'."""
1211 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001212 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001213
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001214 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001215
1216 self._allowZip64 = allowZip64
1217 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001218 self.debug = 0 # Level of printing: 0 through 3
1219 self.NameToInfo = {} # Find file info given name
1220 self.filelist = [] # List of ZipInfo instances for archive
1221 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001222 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001223 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001224 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001225 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001226 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001227
Fred Drake3d9091e2001-03-26 15:49:24 +00001228 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001229 if isinstance(file, os.PathLike):
1230 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001231 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001232 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001233 self._filePassed = 0
1234 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001235 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1236 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001237 filemode = modeDict[mode]
1238 while True:
1239 try:
1240 self.fp = io.open(file, filemode)
1241 except OSError:
1242 if filemode in modeDict:
1243 filemode = modeDict[filemode]
1244 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001245 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001246 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001247 else:
1248 self._filePassed = 1
1249 self.fp = file
1250 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001251 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001252 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001253 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001254 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001255
Antoine Pitrou17babc52012-11-17 23:50:08 +01001256 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001257 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001258 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001259 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001260 # set the modified flag so central directory gets written
1261 # even if no files are added to the archive
1262 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001263 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001264 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001265 except (AttributeError, OSError):
1266 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001267 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001268 self._seekable = False
1269 else:
1270 # Some file-like objects can provide tell() but not seek()
1271 try:
1272 self.fp.seek(self.start_dir)
1273 except (AttributeError, OSError):
1274 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001275 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001276 try:
1277 # See if file is a zip file
1278 self._RealGetContents()
1279 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001280 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001281 except BadZipFile:
1282 # file is not a zip file, just append
1283 self.fp.seek(0, 2)
1284
1285 # set the modified flag so central directory gets written
1286 # even if no files are added to the archive
1287 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001288 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001289 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001290 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001291 except:
1292 fp = self.fp
1293 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001294 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001295 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001296
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001297 def __enter__(self):
1298 return self
1299
1300 def __exit__(self, type, value, traceback):
1301 self.close()
1302
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001303 def __repr__(self):
1304 result = ['<%s.%s' % (self.__class__.__module__,
1305 self.__class__.__qualname__)]
1306 if self.fp is not None:
1307 if self._filePassed:
1308 result.append(' file=%r' % self.fp)
1309 elif self.filename is not None:
1310 result.append(' filename=%r' % self.filename)
1311 result.append(' mode=%r' % self.mode)
1312 else:
1313 result.append(' [closed]')
1314 result.append('>')
1315 return ''.join(result)
1316
Tim Peters7d3bad62001-04-04 18:56:49 +00001317 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001318 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001320 try:
1321 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001322 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001323 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001324 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001325 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001327 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001328 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1329 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001330 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001331
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001332 # "concat" is zero, unless zip was concatenated to another file
1333 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001334 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1335 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001336 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001337
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001339 inferred = concat + offset_cd
1340 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001341 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001342 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001343 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001344 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001345 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001346 total = 0
1347 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001348 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001349 if len(centdir) != sizeCentralDir:
1350 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001351 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001352 if centdir[_CD_SIGNATURE] != stringCentralDir:
1353 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001354 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001355 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001356 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001357 flags = centdir[5]
1358 if flags & 0x800:
1359 # UTF-8 file names extension
1360 filename = filename.decode('utf-8')
1361 else:
1362 # Historical ZIP filename encoding
1363 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001364 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001365 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001366 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1367 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001368 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001369 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001370 x.flag_bits, x.compress_type, t, d,
1371 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001372 if x.extract_version > MAX_EXTRACT_VERSION:
1373 raise NotImplementedError("zip file version %.1f" %
1374 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001375 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1376 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001377 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001378 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001379 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001380
1381 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001382 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001383 self.filelist.append(x)
1384 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001385
1386 # update total bytes read from central directory
1387 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1388 + centdir[_CD_EXTRA_FIELD_LENGTH]
1389 + centdir[_CD_COMMENT_LENGTH])
1390
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001391 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001392 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001393
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001394
1395 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001396 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001397 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001398
1399 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001400 """Return a list of class ZipInfo instances for files in the
1401 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001402 return self.filelist
1403
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001404 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001405 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001406 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1407 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001408 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001409 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001410 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1411 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001412
1413 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001414 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001415 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001416 for zinfo in self.filelist:
1417 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001418 # Read by chunks, to avoid an OverflowError or a
1419 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001420 with self.open(zinfo.filename, "r") as f:
1421 while f.read(chunk_size): # Check CRC-32
1422 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001423 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001424 return zinfo.filename
1425
1426 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001427 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001428 info = self.NameToInfo.get(name)
1429 if info is None:
1430 raise KeyError(
1431 'There is no item named %r in the archive' % name)
1432
1433 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001434
Thomas Wouterscf297e42007-02-23 15:07:44 +00001435 def setpassword(self, pwd):
1436 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001437 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001438 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001439 if pwd:
1440 self.pwd = pwd
1441 else:
1442 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001443
R David Murrayf50b38a2012-04-12 18:44:58 -04001444 @property
1445 def comment(self):
1446 """The comment text associated with the ZIP file."""
1447 return self._comment
1448
1449 @comment.setter
1450 def comment(self, comment):
1451 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001452 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001453 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001454 if len(comment) > ZIP_MAX_COMMENT:
1455 import warnings
1456 warnings.warn('Archive comment is too long; truncating to %d bytes'
1457 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001458 comment = comment[:ZIP_MAX_COMMENT]
1459 self._comment = comment
1460 self._didModify = True
1461
Thomas Wouterscf297e42007-02-23 15:07:44 +00001462 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001463 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001464 with self.open(name, "r", pwd) as fp:
1465 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001466
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001467 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001468 """Return file-like object for 'name'.
1469
1470 name is a string for the file name within the ZIP file, or a ZipInfo
1471 object.
1472
1473 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1474 write to a file newly added to the archive.
1475
1476 pwd is the password to decrypt files (only used for reading).
1477
1478 When writing, if the file size is not known in advance but may exceed
1479 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1480 files. If the size is known in advance, it is best to pass a ZipInfo
1481 instance for name, with zinfo.file_size set.
1482 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001483 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001484 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001485 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001486 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001487 if pwd and (mode == "w"):
1488 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001489 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001490 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001491 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001492
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001493 # Make sure we have an info object
1494 if isinstance(name, ZipInfo):
1495 # 'name' is already an info object
1496 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001497 elif mode == 'w':
1498 zinfo = ZipInfo(name)
1499 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001500 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001501 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001502 # Get info object for name
1503 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001504
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001505 if mode == 'w':
1506 return self._open_to_write(zinfo, force_zip64=force_zip64)
1507
1508 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001509 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001510 "is an open writing handle on it. "
1511 "Close the writing handle before trying to read.")
1512
1513 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001514 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001515 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1516 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001517 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001518 # Skip the file header:
1519 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001520 if len(fheader) != sizeFileHeader:
1521 raise BadZipFile("Truncated file header")
1522 fheader = struct.unpack(structFileHeader, fheader)
1523 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001524 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001525
Antoine Pitrou17babc52012-11-17 23:50:08 +01001526 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1527 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1528 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001529
Antoine Pitrou8572da52012-11-17 23:52:05 +01001530 if zinfo.flag_bits & 0x20:
1531 # Zip 2.7: compressed patched data
1532 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001533
Antoine Pitrou8572da52012-11-17 23:52:05 +01001534 if zinfo.flag_bits & 0x40:
1535 # strong encryption
1536 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001537
Serhiy Storchaka36ff5132020-06-22 11:24:11 +03001538 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & 0x800:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001539 # UTF-8 filename
1540 fname_str = fname.decode("utf-8")
1541 else:
1542 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001543
Antoine Pitrou17babc52012-11-17 23:50:08 +01001544 if fname_str != zinfo.orig_filename:
1545 raise BadZipFile(
1546 'File name in directory %r and header %r differ.'
1547 % (zinfo.orig_filename, fname))
1548
1549 # check for encrypted flag & handle password
1550 is_encrypted = zinfo.flag_bits & 0x1
Antoine Pitrou17babc52012-11-17 23:50:08 +01001551 if is_encrypted:
1552 if not pwd:
1553 pwd = self.pwd
1554 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001555 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001556 "required for extraction" % name)
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001557 else:
1558 pwd = None
Antoine Pitrou17babc52012-11-17 23:50:08 +01001559
Serhiy Storchaka5c32af72019-10-27 10:22:14 +02001560 return ZipExtFile(zef_file, mode, zinfo, pwd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001561 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001562 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001563 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001564
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001565 def _open_to_write(self, zinfo, force_zip64=False):
1566 if force_zip64 and not self._allowZip64:
1567 raise ValueError(
1568 "force_zip64 is True, but allowZip64 was False when opening "
1569 "the ZIP file."
1570 )
1571 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001572 raise ValueError("Can't write to the ZIP file while there is "
1573 "another write handle open on it. "
1574 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001575
Mickaël Schoentgen992347d2019-09-09 15:08:54 +02001576 # Size and CRC are overwritten with correct data after processing the file
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001577 zinfo.compress_size = 0
1578 zinfo.CRC = 0
1579
1580 zinfo.flag_bits = 0x00
1581 if zinfo.compress_type == ZIP_LZMA:
1582 # Compressed data includes an end-of-stream (EOS) marker
1583 zinfo.flag_bits |= 0x02
1584 if not self._seekable:
1585 zinfo.flag_bits |= 0x08
1586
1587 if not zinfo.external_attr:
1588 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1589
1590 # Compressed size can be larger than uncompressed size
1591 zip64 = self._allowZip64 and \
1592 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1593
1594 if self._seekable:
1595 self.fp.seek(self.start_dir)
1596 zinfo.header_offset = self.fp.tell()
1597
1598 self._writecheck(zinfo)
1599 self._didModify = True
1600
1601 self.fp.write(zinfo.FileHeader(zip64))
1602
1603 self._writing = True
1604 return _ZipWriteFile(self, zinfo, zip64)
1605
Christian Heimes790c8232008-01-07 21:14:23 +00001606 def extract(self, member, path=None, pwd=None):
1607 """Extract a member from the archive to the current working directory,
1608 using its full name. Its file information is extracted as accurately
1609 as possible. `member' may be a filename or a ZipInfo object. You can
1610 specify a different directory using `path'.
1611 """
Christian Heimes790c8232008-01-07 21:14:23 +00001612 if path is None:
1613 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001614 else:
1615 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001616
1617 return self._extract_member(member, path, pwd)
1618
1619 def extractall(self, path=None, members=None, pwd=None):
1620 """Extract all members from the archive to the current working
1621 directory. `path' specifies a different directory to extract to.
1622 `members' is optional and must be a subset of the list returned
1623 by namelist().
1624 """
1625 if members is None:
1626 members = self.namelist()
1627
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001628 if path is None:
1629 path = os.getcwd()
1630 else:
1631 path = os.fspath(path)
1632
Christian Heimes790c8232008-01-07 21:14:23 +00001633 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001634 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001635
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001636 @classmethod
1637 def _sanitize_windows_name(cls, arcname, pathsep):
1638 """Replace bad characters and remove trailing dots from parts."""
1639 table = cls._windows_illegal_name_trans_table
1640 if not table:
1641 illegal = ':<>|"?*'
1642 table = str.maketrans(illegal, '_' * len(illegal))
1643 cls._windows_illegal_name_trans_table = table
1644 arcname = arcname.translate(table)
1645 # remove trailing dots
1646 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1647 # rejoin, removing empty parts.
1648 arcname = pathsep.join(x for x in arcname if x)
1649 return arcname
1650
Christian Heimes790c8232008-01-07 21:14:23 +00001651 def _extract_member(self, member, targetpath, pwd):
1652 """Extract the ZipInfo object 'member' to a physical
1653 file on the path targetpath.
1654 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001655 if not isinstance(member, ZipInfo):
1656 member = self.getinfo(member)
1657
Christian Heimes790c8232008-01-07 21:14:23 +00001658 # build the destination pathname, replacing
1659 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001660 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001661
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001662 if os.path.altsep:
1663 arcname = arcname.replace(os.path.altsep, os.path.sep)
1664 # interpret absolute pathname as relative, remove drive letter or
1665 # UNC path, redundant separators, "." and ".." components.
1666 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001667 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001668 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001669 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001670 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001671 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001672 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001673
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001674 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001675 targetpath = os.path.normpath(targetpath)
1676
1677 # Create all upper directories if necessary.
1678 upperdirs = os.path.dirname(targetpath)
1679 if upperdirs and not os.path.exists(upperdirs):
1680 os.makedirs(upperdirs)
1681
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001682 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001683 if not os.path.isdir(targetpath):
1684 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001685 return targetpath
1686
Antoine Pitrou17babc52012-11-17 23:50:08 +01001687 with self.open(member, pwd=pwd) as source, \
1688 open(targetpath, "wb") as target:
1689 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001690
1691 return targetpath
1692
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001693 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001694 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001695 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001696 import warnings
1697 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001698 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001699 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001700 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001701 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001702 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001703 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001704 if not self._allowZip64:
1705 requires_zip64 = None
1706 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1707 requires_zip64 = "Files count"
1708 elif zinfo.file_size > ZIP64_LIMIT:
1709 requires_zip64 = "Filesize"
1710 elif zinfo.header_offset > ZIP64_LIMIT:
1711 requires_zip64 = "Zipfile size"
1712 if requires_zip64:
1713 raise LargeZipFile(requires_zip64 +
1714 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001715
Bo Baylesce237c72018-01-29 23:54:07 -06001716 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001717 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001718 """Put the bytes from filename into the archive under the name
1719 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001720 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001721 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001722 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001723 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001724 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001725 "Can't write to ZIP archive while an open writing handle exists"
1726 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001727
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001728 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001729 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001730
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001731 if zinfo.is_dir():
1732 zinfo.compress_size = 0
1733 zinfo.CRC = 0
1734 else:
1735 if compress_type is not None:
1736 zinfo.compress_type = compress_type
1737 else:
1738 zinfo.compress_type = self.compression
1739
Bo Baylesce237c72018-01-29 23:54:07 -06001740 if compresslevel is not None:
1741 zinfo._compresslevel = compresslevel
1742 else:
1743 zinfo._compresslevel = self.compresslevel
1744
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001745 if zinfo.is_dir():
1746 with self._lock:
1747 if self._seekable:
1748 self.fp.seek(self.start_dir)
1749 zinfo.header_offset = self.fp.tell() # Start of header bytes
1750 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001751 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001752 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001753
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001754 self._writecheck(zinfo)
1755 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001756
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001757 self.filelist.append(zinfo)
1758 self.NameToInfo[zinfo.filename] = zinfo
1759 self.fp.write(zinfo.FileHeader(False))
1760 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001761 else:
1762 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1763 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001764
Bo Baylesce237c72018-01-29 23:54:07 -06001765 def writestr(self, zinfo_or_arcname, data,
1766 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001767 """Write a file into the archive. The contents is 'data', which
1768 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1769 it is encoded as UTF-8 first.
1770 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001771 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001772 if isinstance(data, str):
1773 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001774 if not isinstance(zinfo_or_arcname, ZipInfo):
1775 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001776 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001777 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001778 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001779 if zinfo.filename[-1] == '/':
1780 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1781 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1782 else:
1783 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001784 else:
1785 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001786
1787 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001788 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001789 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001790 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001791 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001792 "Can't write to ZIP archive while an open writing handle exists."
1793 )
1794
1795 if compress_type is not None:
1796 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001797
Bo Baylesce237c72018-01-29 23:54:07 -06001798 if compresslevel is not None:
1799 zinfo._compresslevel = compresslevel
1800
Guido van Rossum85825dc2007-08-27 17:03:28 +00001801 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001802 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001803 with self.open(zinfo, mode='w') as dest:
1804 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001805
1806 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001807 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001808 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001809
1810 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001811 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001812 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001813 if self.fp is None:
1814 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001815
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001816 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001817 raise ValueError("Can't close the ZIP file while there is "
1818 "an open writing handle on it. "
1819 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001820
Antoine Pitrou17babc52012-11-17 23:50:08 +01001821 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001822 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001823 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001824 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001825 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001826 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001827 finally:
1828 fp = self.fp
1829 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001830 self._fpclose(fp)
1831
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001832 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001833 for zinfo in self.filelist: # write central directory
1834 dt = zinfo.date_time
1835 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1836 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1837 extra = []
1838 if zinfo.file_size > ZIP64_LIMIT \
1839 or zinfo.compress_size > ZIP64_LIMIT:
1840 extra.append(zinfo.file_size)
1841 extra.append(zinfo.compress_size)
1842 file_size = 0xffffffff
1843 compress_size = 0xffffffff
1844 else:
1845 file_size = zinfo.file_size
1846 compress_size = zinfo.compress_size
1847
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001848 if zinfo.header_offset > ZIP64_LIMIT:
1849 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001850 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001851 else:
1852 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001853
1854 extra_data = zinfo.extra
1855 min_version = 0
1856 if extra:
1857 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001858 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001859 extra_data = struct.pack(
1860 '<HH' + 'Q'*len(extra),
1861 1, 8*len(extra), *extra) + extra_data
1862
1863 min_version = ZIP64_VERSION
1864
1865 if zinfo.compress_type == ZIP_BZIP2:
1866 min_version = max(BZIP2_VERSION, min_version)
1867 elif zinfo.compress_type == ZIP_LZMA:
1868 min_version = max(LZMA_VERSION, min_version)
1869
1870 extract_version = max(min_version, zinfo.extract_version)
1871 create_version = max(min_version, zinfo.create_version)
Victor Stinner1d3b0aa2020-01-17 15:17:48 +01001872 filename, flag_bits = zinfo._encodeFilenameFlags()
1873 centdir = struct.pack(structCentralDir,
1874 stringCentralDir, create_version,
1875 zinfo.create_system, extract_version, zinfo.reserved,
1876 flag_bits, zinfo.compress_type, dostime, dosdate,
1877 zinfo.CRC, compress_size, file_size,
1878 len(filename), len(extra_data), len(zinfo.comment),
1879 0, zinfo.internal_attr, zinfo.external_attr,
1880 header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001881 self.fp.write(centdir)
1882 self.fp.write(filename)
1883 self.fp.write(extra_data)
1884 self.fp.write(zinfo.comment)
1885
1886 pos2 = self.fp.tell()
1887 # Write end-of-zip-archive record
1888 centDirCount = len(self.filelist)
1889 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001890 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001891 requires_zip64 = None
1892 if centDirCount > ZIP_FILECOUNT_LIMIT:
1893 requires_zip64 = "Files count"
1894 elif centDirOffset > ZIP64_LIMIT:
1895 requires_zip64 = "Central directory offset"
1896 elif centDirSize > ZIP64_LIMIT:
1897 requires_zip64 = "Central directory size"
1898 if requires_zip64:
1899 # Need to write the ZIP64 end-of-archive records
1900 if not self._allowZip64:
1901 raise LargeZipFile(requires_zip64 +
1902 " would require ZIP64 extensions")
1903 zip64endrec = struct.pack(
1904 structEndArchive64, stringEndArchive64,
1905 44, 45, 45, 0, 0, centDirCount, centDirCount,
1906 centDirSize, centDirOffset)
1907 self.fp.write(zip64endrec)
1908
1909 zip64locrec = struct.pack(
1910 structEndArchive64Locator,
1911 stringEndArchive64Locator, 0, pos2, 1)
1912 self.fp.write(zip64locrec)
1913 centDirCount = min(centDirCount, 0xFFFF)
1914 centDirSize = min(centDirSize, 0xFFFFFFFF)
1915 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1916
1917 endrec = struct.pack(structEndArchive, stringEndArchive,
1918 0, 0, centDirCount, centDirCount,
1919 centDirSize, centDirOffset, len(self._comment))
1920 self.fp.write(endrec)
1921 self.fp.write(self._comment)
Jan Mazurff9147d2020-09-28 20:53:33 +02001922 if self.mode == "a":
1923 self.fp.truncate()
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001924 self.fp.flush()
1925
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001926 def _fpclose(self, fp):
1927 assert self._fileRefCnt > 0
1928 self._fileRefCnt -= 1
1929 if not self._fileRefCnt and not self._filePassed:
1930 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001931
1932
1933class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001934 """Class to create ZIP archives with Python library files and packages."""
1935
Georg Brandl8334fd92010-12-04 10:26:46 +00001936 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001937 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001938 ZipFile.__init__(self, file, mode=mode, compression=compression,
1939 allowZip64=allowZip64)
1940 self._optimize = optimize
1941
Christian Tismer59202e52013-10-21 03:59:23 +02001942 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001943 """Add all files from "pathname" to the ZIP archive.
1944
Fred Drake484d7352000-10-02 21:14:52 +00001945 If pathname is a package directory, search the directory and
1946 all package subdirectories recursively for all *.py and enter
1947 the modules into the archive. If pathname is a plain
1948 directory, listdir *.py and enter all modules. Else, pathname
1949 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001950 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001951 This method will compile the module.py into module.pyc if
1952 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001953 If filterfunc(pathname) is given, it is called with every argument.
1954 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001955 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001956 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001957 if filterfunc and not filterfunc(pathname):
1958 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001959 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001960 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001961 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001962 dir, name = os.path.split(pathname)
1963 if os.path.isdir(pathname):
1964 initname = os.path.join(pathname, "__init__.py")
1965 if os.path.isfile(initname):
1966 # This is a package directory, add it
1967 if basename:
1968 basename = "%s/%s" % (basename, name)
1969 else:
1970 basename = name
1971 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001972 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001973 fname, arcname = self._get_codename(initname[0:-3], basename)
1974 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001975 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001976 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001977 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001978 dirlist.remove("__init__.py")
1979 # Add all *.py files and package subdirectories
1980 for filename in dirlist:
1981 path = os.path.join(pathname, filename)
1982 root, ext = os.path.splitext(filename)
1983 if os.path.isdir(path):
1984 if os.path.isfile(os.path.join(path, "__init__.py")):
1985 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001986 self.writepy(path, basename,
1987 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001988 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001989 if filterfunc and not filterfunc(path):
1990 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001991 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001992 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001994 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001995 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001996 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001997 self.write(fname, arcname)
1998 else:
1999 # This is NOT a package directory, add its files at top level
2000 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002001 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002002 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002003 path = os.path.join(pathname, filename)
2004 root, ext = os.path.splitext(filename)
2005 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002006 if filterfunc and not filterfunc(path):
2007 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002008 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002009 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002010 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002011 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002012 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002013 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002014 self.write(fname, arcname)
2015 else:
2016 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002017 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002018 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002019 fname, arcname = self._get_codename(pathname[0:-3], basename)
2020 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002021 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002022 self.write(fname, arcname)
2023
2024 def _get_codename(self, pathname, basename):
2025 """Return (filename, archivename) for the path.
2026
Fred Drake484d7352000-10-02 21:14:52 +00002027 Given a module name path, return the correct file path and
2028 archive name, compiling if necessary. For example, given
2029 /python/lib/string, return (/python/lib/string.pyc, string).
2030 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002031 def _compile(file, optimize=-1):
2032 import py_compile
2033 if self.debug:
2034 print("Compiling", file)
2035 try:
2036 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002037 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002038 print(err.msg)
2039 return False
2040 return True
2041
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002042 file_py = pathname + ".py"
2043 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002044 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2045 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2046 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002047 if self._optimize == -1:
2048 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002049 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002050 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2051 # Use .pyc file.
2052 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002053 elif (os.path.isfile(pycache_opt0) and
2054 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002055 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2056 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002057 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002058 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002059 elif (os.path.isfile(pycache_opt1) and
2060 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2061 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002062 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002063 fname = pycache_opt1
2064 arcname = file_pyc
2065 elif (os.path.isfile(pycache_opt2) and
2066 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2067 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2068 # file name in the archive.
2069 fname = pycache_opt2
2070 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002071 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002072 # Compile py into PEP 3147 pyc file.
2073 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002074 if sys.flags.optimize == 0:
2075 fname = pycache_opt0
2076 elif sys.flags.optimize == 1:
2077 fname = pycache_opt1
2078 else:
2079 fname = pycache_opt2
2080 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002081 else:
2082 fname = arcname = file_py
2083 else:
2084 # new mode: use given optimization level
2085 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002086 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002087 arcname = file_pyc
2088 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002089 arcname = file_pyc
2090 if self._optimize == 1:
2091 fname = pycache_opt1
2092 elif self._optimize == 2:
2093 fname = pycache_opt2
2094 else:
2095 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2096 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002097 if not (os.path.isfile(fname) and
2098 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2099 if not _compile(file_py, optimize=self._optimize):
2100 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002101 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002102 if basename:
2103 archivename = "%s/%s" % (basename, archivename)
2104 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002105
2106
shireenraoa4e29912019-08-24 11:26:41 -04002107def _parents(path):
2108 """
2109 Given a path with elements separated by
2110 posixpath.sep, generate all parents of that path.
2111
2112 >>> list(_parents('b/d'))
2113 ['b']
2114 >>> list(_parents('/b/d/'))
2115 ['/b']
2116 >>> list(_parents('b/d/f/'))
2117 ['b/d', 'b']
2118 >>> list(_parents('b'))
2119 []
2120 >>> list(_parents(''))
2121 []
2122 """
2123 return itertools.islice(_ancestry(path), 1, None)
2124
2125
2126def _ancestry(path):
2127 """
2128 Given a path with elements separated by
2129 posixpath.sep, generate all elements of that path
2130
2131 >>> list(_ancestry('b/d'))
2132 ['b/d', 'b']
2133 >>> list(_ancestry('/b/d/'))
2134 ['/b/d', '/b']
2135 >>> list(_ancestry('b/d/f/'))
2136 ['b/d/f', 'b/d', 'b']
2137 >>> list(_ancestry('b'))
2138 ['b']
2139 >>> list(_ancestry(''))
2140 []
2141 """
2142 path = path.rstrip(posixpath.sep)
2143 while path and path != posixpath.sep:
2144 yield path
2145 path, tail = posixpath.split(path)
2146
2147
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002148_dedupe = dict.fromkeys
2149"""Deduplicate an iterable in original order"""
2150
2151
2152def _difference(minuend, subtrahend):
2153 """
2154 Return items in minuend not in subtrahend, retaining order
2155 with O(1) lookup.
2156 """
2157 return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2158
2159
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002160class CompleteDirs(ZipFile):
2161 """
2162 A ZipFile subclass that ensures that implied directories
2163 are always included in the namelist.
2164 """
2165
2166 @staticmethod
2167 def _implied_dirs(names):
2168 parents = itertools.chain.from_iterable(map(_parents, names))
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002169 as_dirs = (p + posixpath.sep for p in parents)
2170 return _dedupe(_difference(as_dirs, names))
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002171
2172 def namelist(self):
2173 names = super(CompleteDirs, self).namelist()
2174 return names + list(self._implied_dirs(names))
2175
2176 def _name_set(self):
2177 return set(self.namelist())
2178
2179 def resolve_dir(self, name):
2180 """
2181 If the name represents a directory, return that name
2182 as a directory (with the trailing slash).
2183 """
2184 names = self._name_set()
2185 dirname = name + '/'
2186 dir_match = name not in names and dirname in names
2187 return dirname if dir_match else name
2188
2189 @classmethod
2190 def make(cls, source):
2191 """
2192 Given a source (filename or zipfile), return an
2193 appropriate CompleteDirs subclass.
2194 """
2195 if isinstance(source, CompleteDirs):
2196 return source
2197
2198 if not isinstance(source, ZipFile):
2199 return cls(source)
2200
Jason R. Coombsebbe8032020-10-03 10:58:39 -04002201 # Only allow for FastLookup when supplied zipfile is read-only
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002202 if 'r' not in source.mode:
2203 cls = CompleteDirs
2204
Jason R. Coombsebbe8032020-10-03 10:58:39 -04002205 source.__class__ = cls
2206 return source
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002207
2208
2209class FastLookup(CompleteDirs):
2210 """
2211 ZipFile subclass to ensure implicit
2212 dirs exist and are resolved rapidly.
2213 """
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002214
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002215 def namelist(self):
2216 with contextlib.suppress(AttributeError):
2217 return self.__names
2218 self.__names = super(FastLookup, self).namelist()
2219 return self.__names
2220
2221 def _name_set(self):
2222 with contextlib.suppress(AttributeError):
2223 return self.__lookup
2224 self.__lookup = super(FastLookup, self)._name_set()
2225 return self.__lookup
2226
2227
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002228class Path:
2229 """
2230 A pathlib-compatible interface for zip files.
2231
2232 Consider a zip file with this structure::
2233
2234 .
2235 ├── a.txt
2236 └── b
2237 ├── c.txt
2238 └── d
2239 └── e.txt
2240
2241 >>> data = io.BytesIO()
2242 >>> zf = ZipFile(data, 'w')
2243 >>> zf.writestr('a.txt', 'content of a')
2244 >>> zf.writestr('b/c.txt', 'content of c')
2245 >>> zf.writestr('b/d/e.txt', 'content of e')
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002246 >>> zf.filename = 'mem/abcde.zip'
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002247
2248 Path accepts the zipfile object itself or a filename
2249
2250 >>> root = Path(zf)
2251
2252 From there, several path operations are available.
2253
2254 Directory iteration (including the zip file itself):
2255
2256 >>> a, b = root.iterdir()
2257 >>> a
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002258 Path('mem/abcde.zip', 'a.txt')
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002259 >>> b
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002260 Path('mem/abcde.zip', 'b/')
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002261
2262 name property:
2263
2264 >>> b.name
2265 'b'
2266
2267 join with divide operator:
2268
2269 >>> c = b / 'c.txt'
2270 >>> c
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002271 Path('mem/abcde.zip', 'b/c.txt')
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002272 >>> c.name
2273 'c.txt'
2274
2275 Read text:
2276
2277 >>> c.read_text()
2278 'content of c'
2279
2280 existence:
2281
2282 >>> c.exists()
2283 True
2284 >>> (b / 'missing.txt').exists()
2285 False
2286
Xtreak0d702272019-06-03 04:42:33 +05302287 Coercion to string:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002288
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002289 >>> import os
2290 >>> str(c).replace(os.sep, posixpath.sep)
2291 'mem/abcde.zip/b/c.txt'
2292
2293 At the root, ``name``, ``filename``, and ``parent``
2294 resolve to the zipfile. Note these attributes are not
2295 valid and will raise a ``ValueError`` if the zipfile
2296 has no filename.
2297
2298 >>> root.name
2299 'abcde.zip'
2300 >>> str(root.filename).replace(os.sep, posixpath.sep)
2301 'mem/abcde.zip'
2302 >>> str(root.parent)
2303 'mem'
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002304 """
2305
2306 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2307
2308 def __init__(self, root, at=""):
Jason R. Coombsebbe8032020-10-03 10:58:39 -04002309 """
2310 Construct a Path from a ZipFile or filename.
2311
2312 Note: When the source is an existing ZipFile object,
2313 its type (__class__) will be mutated to a
2314 specialized type. If the caller wishes to retain the
2315 original type, the caller should either create a
2316 separate ZipFile object or pass a filename.
2317 """
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002318 self.root = FastLookup.make(root)
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002319 self.at = at
2320
Jason R. Coombsebbe8032020-10-03 10:58:39 -04002321 def open(self, mode='r', *args, pwd=None, **kwargs):
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002322 """
2323 Open this entry as text or binary following the semantics
2324 of ``pathlib.Path.open()`` by passing arguments through
2325 to io.TextIOWrapper().
2326 """
Jason R. Coombsebbe8032020-10-03 10:58:39 -04002327 if self.is_dir():
2328 raise IsADirectoryError(self)
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002329 zip_mode = mode[0]
Jason R. Coombsebbe8032020-10-03 10:58:39 -04002330 if not self.exists() and zip_mode == 'r':
2331 raise FileNotFoundError(self)
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002332 stream = self.root.open(self.at, zip_mode, pwd=pwd)
2333 if 'b' in mode:
2334 if args or kwargs:
2335 raise ValueError("encoding args invalid for binary operation")
2336 return stream
2337 return io.TextIOWrapper(stream, *args, **kwargs)
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002338
2339 @property
2340 def name(self):
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002341 return pathlib.Path(self.at).name or self.filename.name
2342
2343 @property
2344 def filename(self):
2345 return pathlib.Path(self.root.filename).joinpath(self.at)
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002346
2347 def read_text(self, *args, **kwargs):
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002348 with self.open('r', *args, **kwargs) as strm:
2349 return strm.read()
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002350
2351 def read_bytes(self):
Jason R. Coombs0aeab5c2020-02-29 10:34:11 -06002352 with self.open('rb') as strm:
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002353 return strm.read()
2354
2355 def _is_child(self, path):
2356 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2357
2358 def _next(self, at):
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002359 return self.__class__(self.root, at)
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002360
2361 def is_dir(self):
2362 return not self.at or self.at.endswith("/")
2363
2364 def is_file(self):
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002365 return self.exists() and not self.is_dir()
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002366
2367 def exists(self):
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002368 return self.at in self.root._name_set()
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002369
2370 def iterdir(self):
2371 if not self.is_dir():
2372 raise ValueError("Can't listdir a file")
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002373 subs = map(self._next, self.root.namelist())
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002374 return filter(self._is_child, subs)
2375
2376 def __str__(self):
2377 return posixpath.join(self.root.filename, self.at)
2378
2379 def __repr__(self):
2380 return self.__repr.format(self=self)
2381
Jason R. Coombs928dbfc2020-12-15 21:12:54 -05002382 def joinpath(self, *other):
2383 next = posixpath.join(self.at, *other)
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002384 return self._next(self.root.resolve_dir(next))
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002385
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002386 __truediv__ = joinpath
2387
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002388 @property
2389 def parent(self):
Jason R. Coombsd1a0a962020-10-25 14:45:05 -04002390 if not self.at:
2391 return self.filename.parent
Jason R. Coombs38f44b42019-07-07 17:37:50 -04002392 parent_at = posixpath.dirname(self.at.rstrip('/'))
Jason R. Coombs33e067d2019-05-09 11:34:36 -04002393 if parent_at:
2394 parent_at += '/'
2395 return self._next(parent_at)
2396
Jason R. Coombsb2758ff2019-05-08 09:45:06 -04002397
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002398def main(args=None):
2399 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002400
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002401 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002402 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002403 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002404 group.add_argument('-l', '--list', metavar='<zipfile>',
2405 help='Show listing of a zipfile')
2406 group.add_argument('-e', '--extract', nargs=2,
2407 metavar=('<zipfile>', '<output_dir>'),
2408 help='Extract zipfile into target dir')
2409 group.add_argument('-c', '--create', nargs='+',
2410 metavar=('<name>', '<file>'),
2411 help='Create zipfile from sources')
2412 group.add_argument('-t', '--test', metavar='<zipfile>',
2413 help='Test if a zipfile is valid')
2414 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002415
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002416 if args.test is not None:
2417 src = args.test
2418 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002419 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002420 if badfile:
2421 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002422 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002423
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002424 elif args.list is not None:
2425 src = args.list
2426 with ZipFile(src, 'r') as zf:
2427 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002428
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002429 elif args.extract is not None:
2430 src, curdir = args.extract
2431 with ZipFile(src, 'r') as zf:
2432 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002433
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002434 elif args.create is not None:
2435 zip_name = args.create.pop(0)
2436 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002437
2438 def addToZip(zf, path, zippath):
2439 if os.path.isfile(path):
2440 zf.write(path, zippath, ZIP_DEFLATED)
2441 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002442 if zippath:
2443 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002444 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002445 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002446 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002447 # else: ignore
2448
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002449 with ZipFile(zip_name, 'w') as zf:
2450 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002451 zippath = os.path.basename(path)
2452 if not zippath:
2453 zippath = os.path.basename(os.path.dirname(path))
2454 if zippath in ('', os.curdir, os.pardir):
2455 zippath = ''
2456 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002457
Jason R. Coombse5bd7362020-02-11 21:58:47 -05002458
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002459if __name__ == "__main__":
2460 main()