blob: 9f88512d9838b19cf946db3d9fe3037eba29f372 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Miss Islington (bot)efdf3162018-09-17 06:08:45 -0700162_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
163
164def _strip_extra(extra, xids):
165 # Remove Extra Fields with specified IDs.
166 unpack = _EXTRA_FIELD_STRUCT.unpack
167 modified = False
168 buffer = []
169 start = i = 0
170 while i + 4 <= len(extra):
171 xid, xlen = unpack(extra[i : i + 4])
172 j = i + 4 + xlen
173 if xid in xids:
174 if i != start:
175 buffer.append(extra[start : i])
176 start = j
177 modified = True
178 i = j
179 if not modified:
180 return extra
181 return b''.join(buffer)
182
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000183def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000184 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185 if _EndRecData(fp):
186 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200187 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000189 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000190
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000191def is_zipfile(filename):
192 """Quickly see if a file is a ZIP file by checking the magic number.
193
194 The filename argument may be a file or file-like object too.
195 """
196 result = False
197 try:
198 if hasattr(filename, "read"):
199 result = _check_zipfile(fp=filename)
200 else:
201 with open(filename, "rb") as fp:
202 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200203 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000204 pass
205 return result
206
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207def _EndRecData64(fpin, offset, endrec):
208 """
209 Read the ZIP64 end-of-archive records and use that to update endrec
210 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000211 try:
212 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200213 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000214 # If the seek fails, the file is not large enough to contain a ZIP64
215 # end-of-archive record, so just return the end record we were given.
216 return endrec
217
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200219 if len(data) != sizeEndCentDir64Locator:
220 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000221 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
222 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000223 return endrec
224
225 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000226 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227
228 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000229 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
230 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200231 if len(data) != sizeEndCentDir64:
232 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000233 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200234 dircount, dircount2, dirsize, diroffset = \
235 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000236 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000237 return endrec
238
239 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000240 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000241 endrec[_ECD_DISK_NUMBER] = disk_num
242 endrec[_ECD_DISK_START] = disk_dir
243 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
244 endrec[_ECD_ENTRIES_TOTAL] = dircount2
245 endrec[_ECD_SIZE] = dirsize
246 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000247 return endrec
248
249
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250def _EndRecData(fpin):
251 """Return data from the "End of Central Directory" record, or None.
252
253 The data is a list of the nine items in the ZIP "End of central dir"
254 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255
256 # Determine file size
257 fpin.seek(0, 2)
258 filesize = fpin.tell()
259
260 # Check to see if this is ZIP file with no archive comment (the
261 # "end of central directory" structure should be the last item in the
262 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000263 try:
264 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200265 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000266 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000267 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200268 if (len(data) == sizeEndCentDir and
269 data[0:4] == stringEndArchive and
270 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000271 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000272 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000273 endrec=list(endrec)
274
275 # Append a blank comment and record start offset
276 endrec.append(b"")
277 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000279 # Try to read the "Zip64 end of central directory" structure
280 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000281
282 # Either this is not a ZIP file, or it is a ZIP file with an archive
283 # comment. Search the end of the file for the "end of central directory"
284 # record signature. The comment is the last item in the ZIP file and may be
285 # up to 64K long. It is assumed that the "end of central directory" magic
286 # number does not appear in the comment.
287 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
288 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000289 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000290 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000291 if start >= 0:
292 # found the magic number; attempt to unpack and interpret
293 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200294 if len(recData) != sizeEndCentDir:
295 # Zip file is corrupted.
296 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000297 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400298 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
299 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
300 endrec.append(comment)
301 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000302
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 # Try to read the "Zip64 end of central directory" structure
304 return _EndRecData64(fpin, maxCommentStart + start - filesize,
305 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000306
307 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200308 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000309
Fred Drake484d7352000-10-02 21:14:52 +0000310
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000311class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Class with attributes describing each file in the ZIP archive."""
313
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000314 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200315 'orig_filename',
316 'filename',
317 'date_time',
318 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600319 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'comment',
321 'extra',
322 'create_system',
323 'create_version',
324 'extract_version',
325 'reserved',
326 'flag_bits',
327 'volume',
328 'internal_attr',
329 'external_attr',
330 'header_offset',
331 'CRC',
332 'compress_size',
333 'file_size',
334 '_raw_time',
335 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000336
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000338 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339
340 # Terminate the file name at the first null byte. Null bytes in file
341 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000342 null_byte = filename.find(chr(0))
343 if null_byte >= 0:
344 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000345 # This is used to ensure paths in generated ZIP files always use
346 # forward slashes as the directory separator, as required by the
347 # ZIP format specification.
348 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000349 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350
Greg Ward8e36d282003-06-18 00:53:06 +0000351 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000352 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800353
354 if date_time[0] < 1980:
355 raise ValueError('ZIP does not support timestamps before 1980')
356
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000358 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600359 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000360 self.comment = b"" # Comment for each file
361 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000362 if sys.platform == 'win32':
363 self.create_system = 0 # System which created ZIP archive
364 else:
365 # Assume everything else is unix-y
366 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200367 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
368 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000369 self.reserved = 0 # Must be zero
370 self.flag_bits = 0 # ZIP flag bits
371 self.volume = 0 # Volume number of file header
372 self.internal_attr = 0 # Internal attributes
373 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000374 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000375 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000376 # CRC CRC-32 of the uncompressed file
377 # compress_size Size of the compressed file
378 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200380 def __repr__(self):
381 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
382 if self.compress_type != ZIP_STORED:
383 result.append(' compress_type=%s' %
384 compressor_names.get(self.compress_type,
385 self.compress_type))
386 hi = self.external_attr >> 16
387 lo = self.external_attr & 0xFFFF
388 if hi:
389 result.append(' filemode=%r' % stat.filemode(hi))
390 if lo:
391 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200392 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200393 if not isdir or self.file_size:
394 result.append(' file_size=%r' % self.file_size)
395 if ((not isdir or self.compress_size) and
396 (self.compress_type != ZIP_STORED or
397 self.file_size != self.compress_size)):
398 result.append(' compress_size=%r' % self.compress_size)
399 result.append('>')
400 return ''.join(result)
401
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200402 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000403 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404 dt = self.date_time
405 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000406 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000407 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000408 # Set these to zero because we write them after the file data
409 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000410 else:
Tim Peterse1190062001-01-15 03:34:38 +0000411 CRC = self.CRC
412 compress_size = self.compress_size
413 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414
415 extra = self.extra
416
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200417 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200418 if zip64 is None:
419 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
420 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000421 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000422 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200423 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200424 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
425 if not zip64:
426 raise LargeZipFile("Filesize would require ZIP64 extensions")
427 # File is larger than what fits into a 4 byte integer,
428 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000429 file_size = 0xffffffff
430 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200431 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200433 if self.compress_type == ZIP_BZIP2:
434 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200435 elif self.compress_type == ZIP_LZMA:
436 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200437
438 self.extract_version = max(min_version, self.extract_version)
439 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000440 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000441 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200442 self.extract_version, self.reserved, flag_bits,
443 self.compress_type, dostime, dosdate, CRC,
444 compress_size, file_size,
445 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000446 return header + filename + extra
447
448 def _encodeFilenameFlags(self):
449 try:
450 return self.filename.encode('ascii'), self.flag_bits
451 except UnicodeEncodeError:
452 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
454 def _decodeExtra(self):
455 # Try to decode the extra field.
456 extra = self.extra
457 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700458 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000459 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200460 if ln+4 > len(extra):
461 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
462 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000466 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000468 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 elif ln == 0:
470 counts = ()
471 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300472 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473
474 idx = 0
475
476 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000477 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 self.file_size = counts[idx]
479 idx += 1
480
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000481 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000482 self.compress_size = counts[idx]
483 idx += 1
484
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000485 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000486 old = self.header_offset
487 self.header_offset = counts[idx]
488 idx+=1
489
490 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000491
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200492 @classmethod
493 def from_file(cls, filename, arcname=None):
494 """Construct an appropriate ZipInfo for a file on the filesystem.
495
496 filename should be the path to a file or directory on the filesystem.
497
498 arcname is the name which it will have within the archive (by default,
499 this will be the same as filename, but without a drive letter and with
500 leading path separators removed).
501 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200502 if isinstance(filename, os.PathLike):
503 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200504 st = os.stat(filename)
505 isdir = stat.S_ISDIR(st.st_mode)
506 mtime = time.localtime(st.st_mtime)
507 date_time = mtime[0:6]
508 # Create ZipInfo instance to store file information
509 if arcname is None:
510 arcname = filename
511 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
512 while arcname[0] in (os.sep, os.altsep):
513 arcname = arcname[1:]
514 if isdir:
515 arcname += '/'
516 zinfo = cls(arcname, date_time)
517 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
518 if isdir:
519 zinfo.file_size = 0
520 zinfo.external_attr |= 0x10 # MS-DOS directory flag
521 else:
522 zinfo.file_size = st.st_size
523
524 return zinfo
525
526 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300527 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200528 return self.filename[-1] == '/'
529
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000530
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300531# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
532# internal keys. We noticed that a direct implementation is faster than
533# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000534
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300535_crctable = None
536def _gen_crc(crc):
537 for j in range(8):
538 if crc & 1:
539 crc = (crc >> 1) ^ 0xEDB88320
540 else:
541 crc >>= 1
542 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300544# ZIP supports a password-based form of encryption. Even though known
545# plaintext attacks have been found against it, it is still useful
546# to be able to get data out of such a file.
547#
548# Usage:
549# zd = _ZipDecrypter(mypwd)
550# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300552def _ZipDecrypter(pwd):
553 key0 = 305419896
554 key1 = 591751049
555 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000556
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300557 global _crctable
558 if _crctable is None:
559 _crctable = list(map(_gen_crc, range(256)))
560 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000561
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300562 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300564 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 def update_keys(c):
567 nonlocal key0, key1, key2
568 key0 = crc32(c, key0)
569 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
570 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
571 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000572
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300573 for p in pwd:
574 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000575
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300576 def decrypter(data):
577 """Decrypt a bytes object."""
578 result = bytearray()
579 append = result.append
580 for c in data:
581 k = key2 | 2
582 c ^= ((k * (k^1)) >> 8) & 0xFF
583 update_keys(c)
584 append(c)
585 return bytes(result)
586
587 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000588
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200589
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200590class LZMACompressor:
591
592 def __init__(self):
593 self._comp = None
594
595 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200596 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200597 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200598 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599 ])
600 return struct.pack('<BBH', 9, 4, len(props)) + props
601
602 def compress(self, data):
603 if self._comp is None:
604 return self._init() + self._comp.compress(data)
605 return self._comp.compress(data)
606
607 def flush(self):
608 if self._comp is None:
609 return self._init() + self._comp.flush()
610 return self._comp.flush()
611
612
613class LZMADecompressor:
614
615 def __init__(self):
616 self._decomp = None
617 self._unconsumed = b''
618 self.eof = False
619
620 def decompress(self, data):
621 if self._decomp is None:
622 self._unconsumed += data
623 if len(self._unconsumed) <= 4:
624 return b''
625 psize, = struct.unpack('<H', self._unconsumed[2:4])
626 if len(self._unconsumed) <= 4 + psize:
627 return b''
628
629 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200630 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
631 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200632 ])
633 data = self._unconsumed[4 + psize:]
634 del self._unconsumed
635
636 result = self._decomp.decompress(data)
637 self.eof = self._decomp.eof
638 return result
639
640
641compressor_names = {
642 0: 'store',
643 1: 'shrink',
644 2: 'reduce',
645 3: 'reduce',
646 4: 'reduce',
647 5: 'reduce',
648 6: 'implode',
649 7: 'tokenize',
650 8: 'deflate',
651 9: 'deflate64',
652 10: 'implode',
653 12: 'bzip2',
654 14: 'lzma',
655 18: 'terse',
656 19: 'lz77',
657 97: 'wavpack',
658 98: 'ppmd',
659}
660
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200661def _check_compression(compression):
662 if compression == ZIP_STORED:
663 pass
664 elif compression == ZIP_DEFLATED:
665 if not zlib:
666 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200667 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 elif compression == ZIP_BZIP2:
669 if not bz2:
670 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200671 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200672 elif compression == ZIP_LZMA:
673 if not lzma:
674 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200675 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200676 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300677 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678
679
Bo Baylesce237c72018-01-29 23:54:07 -0600680def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200681 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600682 if compresslevel is not None:
683 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
684 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600686 if compresslevel is not None:
687 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200688 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600689 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200690 elif compress_type == ZIP_LZMA:
691 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200692 else:
693 return None
694
695
696def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200697 if compress_type == ZIP_STORED:
698 return None
699 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200700 return zlib.decompressobj(-15)
701 elif compress_type == ZIP_BZIP2:
702 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200703 elif compress_type == ZIP_LZMA:
704 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200705 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200706 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200707 if descr:
708 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
709 else:
710 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200711
712
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200713class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300714 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200715 self._file = file
716 self._pos = pos
717 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200718 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300719 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700720 self.seekable = file.seekable
721 self.tell = file.tell
722
723 def seek(self, offset, whence=0):
724 with self._lock:
Miss Islington (bot)ad4f64d2018-07-29 12:57:21 -0700725 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700726 raise ValueError("Can't reposition in the ZIP file while "
727 "there is an open writing handle on it. "
728 "Close the writing handle before trying to read.")
Miss Islington (bot)ad4f64d2018-07-29 12:57:21 -0700729 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700730 self._pos = self._file.tell()
731 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200732
733 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200734 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300735 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300736 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300737 "is an open writing handle on it. "
738 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200739 self._file.seek(self._pos)
740 data = self._file.read(n)
741 self._pos = self._file.tell()
742 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200743
744 def close(self):
745 if self._file is not None:
746 fileobj = self._file
747 self._file = None
748 self._close(fileobj)
749
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200750# Provide the tell method for unseekable stream
751class _Tellable:
752 def __init__(self, fp):
753 self.fp = fp
754 self.offset = 0
755
756 def write(self, data):
757 n = self.fp.write(data)
758 self.offset += n
759 return n
760
761 def tell(self):
762 return self.offset
763
764 def flush(self):
765 self.fp.flush()
766
767 def close(self):
768 self.fp.close()
769
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200770
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000771class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772 """File-like object for reading an archive member.
773 Is returned by ZipFile.open().
774 """
775
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000776 # Max size supported by decompressor.
777 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000778
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000779 # Read from compressed files in 4k blocks.
780 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781
John Jolly066df4f2018-01-30 01:51:35 -0700782 # Chunk size to read during seek
783 MAX_SEEK_READ = 1 << 24
784
Łukasz Langae94980a2010-11-22 23:31:26 +0000785 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
786 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000787 self._fileobj = fileobj
788 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000789 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000790
Ezio Melotti92b47432010-01-28 01:44:41 +0000791 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000792 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200793 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000794
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200795 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000796
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200797 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798 self._readbuffer = b''
799 self._offset = 0
800
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000801 self.newlines = None
802
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000803 # Adjust read size for encrypted files since the first 12 bytes
804 # are for the encryption/password information.
805 if self._decrypter is not None:
806 self._compress_left -= 12
807
808 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809 self.name = zipinfo.filename
810
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000811 if hasattr(zipinfo, 'CRC'):
812 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000813 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000814 else:
815 self._expected_crc = None
816
John Jolly066df4f2018-01-30 01:51:35 -0700817 self._seekable = False
818 try:
819 if fileobj.seekable():
820 self._orig_compress_start = fileobj.tell()
821 self._orig_compress_size = zipinfo.compress_size
822 self._orig_file_size = zipinfo.file_size
823 self._orig_start_crc = self._running_crc
824 self._seekable = True
825 except AttributeError:
826 pass
827
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200828 def __repr__(self):
829 result = ['<%s.%s' % (self.__class__.__module__,
830 self.__class__.__qualname__)]
831 if not self.closed:
832 result.append(' name=%r mode=%r' % (self.name, self.mode))
833 if self._compress_type != ZIP_STORED:
834 result.append(' compress_type=%s' %
835 compressor_names.get(self._compress_type,
836 self._compress_type))
837 else:
838 result.append(' [closed]')
839 result.append('>')
840 return ''.join(result)
841
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000842 def readline(self, limit=-1):
843 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000845 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847
Serhiy Storchakae670be22016-06-11 19:32:44 +0300848 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000849 # Shortcut common case - newline found in buffer.
850 i = self._readbuffer.find(b'\n', self._offset) + 1
851 if i > 0:
852 line = self._readbuffer[self._offset: i]
853 self._offset = i
854 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855
Serhiy Storchakae670be22016-06-11 19:32:44 +0300856 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000857
858 def peek(self, n=1):
859 """Returns buffered bytes without advancing the position."""
860 if n > len(self._readbuffer) - self._offset:
861 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200862 if len(chunk) > self._offset:
863 self._readbuffer = chunk + self._readbuffer[self._offset:]
864 self._offset = 0
865 else:
866 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000867
868 # Return up to 512 bytes to reduce allocation overhead for tight loops.
869 return self._readbuffer[self._offset: self._offset + 512]
870
871 def readable(self):
872 return True
873
874 def read(self, n=-1):
875 """Read and return up to n bytes.
876 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200878 if n is None or n < 0:
879 buf = self._readbuffer[self._offset:]
880 self._readbuffer = b''
881 self._offset = 0
882 while not self._eof:
883 buf += self._read1(self.MAX_N)
884 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885
Antoine Pitrou78157b32012-06-23 16:44:48 +0200886 end = n + self._offset
887 if end < len(self._readbuffer):
888 buf = self._readbuffer[self._offset:end]
889 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200890 return buf
891
Antoine Pitrou78157b32012-06-23 16:44:48 +0200892 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200893 buf = self._readbuffer[self._offset:]
894 self._readbuffer = b''
895 self._offset = 0
896 while n > 0 and not self._eof:
897 data = self._read1(n)
898 if n < len(data):
899 self._readbuffer = data
900 self._offset = n
901 buf += data[:n]
902 break
903 buf += data
904 n -= len(data)
905 return buf
906
907 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000908 # Update the CRC using the given data.
909 if self._expected_crc is None:
910 # No need to compute the CRC if we don't have a reference value
911 return
Martin Panterb82032f2015-12-11 05:19:29 +0000912 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000913 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000915 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000916
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000917 def read1(self, n):
918 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200920 if n is None or n < 0:
921 buf = self._readbuffer[self._offset:]
922 self._readbuffer = b''
923 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300924 while not self._eof:
925 data = self._read1(self.MAX_N)
926 if data:
927 buf += data
928 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200929 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930
Antoine Pitrou78157b32012-06-23 16:44:48 +0200931 end = n + self._offset
932 if end < len(self._readbuffer):
933 buf = self._readbuffer[self._offset:end]
934 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200935 return buf
936
Antoine Pitrou78157b32012-06-23 16:44:48 +0200937 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200938 buf = self._readbuffer[self._offset:]
939 self._readbuffer = b''
940 self._offset = 0
941 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300942 while not self._eof:
943 data = self._read1(n)
944 if n < len(data):
945 self._readbuffer = data
946 self._offset = n
947 buf += data[:n]
948 break
949 if data:
950 buf += data
951 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200952 return buf
953
954 def _read1(self, n):
955 # Read up to n compressed bytes with at most one read() system call,
956 # decrypt and decompress them.
957 if self._eof or n <= 0:
958 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000960 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200961 if self._compress_type == ZIP_DEFLATED:
962 ## Handle unconsumed data.
963 data = self._decompressor.unconsumed_tail
964 if n > len(data):
965 data += self._read2(n - len(data))
966 else:
967 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200969 if self._compress_type == ZIP_STORED:
970 self._eof = self._compress_left <= 0
971 elif self._compress_type == ZIP_DEFLATED:
972 n = max(n, self.MIN_READ_SIZE)
973 data = self._decompressor.decompress(data, n)
974 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200975 self._compress_left <= 0 and
976 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200977 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000978 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200979 else:
980 data = self._decompressor.decompress(data)
981 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000982
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200983 data = data[:self._left]
984 self._left -= len(data)
985 if self._left <= 0:
986 self._eof = True
987 self._update_crc(data)
988 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000989
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200990 def _read2(self, n):
991 if self._compress_left <= 0:
992 return b''
993
994 n = max(n, self.MIN_READ_SIZE)
995 n = min(n, self._compress_left)
996
997 data = self._fileobj.read(n)
998 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200999 if not data:
1000 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001001
1002 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001003 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001004 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001005
Łukasz Langae94980a2010-11-22 23:31:26 +00001006 def close(self):
1007 try:
1008 if self._close_fileobj:
1009 self._fileobj.close()
1010 finally:
1011 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001012
John Jolly066df4f2018-01-30 01:51:35 -07001013 def seekable(self):
1014 return self._seekable
1015
1016 def seek(self, offset, whence=0):
1017 if not self._seekable:
1018 raise io.UnsupportedOperation("underlying stream is not seekable")
1019 curr_pos = self.tell()
1020 if whence == 0: # Seek from start of file
1021 new_pos = offset
1022 elif whence == 1: # Seek from current position
1023 new_pos = curr_pos + offset
1024 elif whence == 2: # Seek from EOF
1025 new_pos = self._orig_file_size + offset
1026 else:
1027 raise ValueError("whence must be os.SEEK_SET (0), "
1028 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1029
1030 if new_pos > self._orig_file_size:
1031 new_pos = self._orig_file_size
1032
1033 if new_pos < 0:
1034 new_pos = 0
1035
1036 read_offset = new_pos - curr_pos
1037 buff_offset = read_offset + self._offset
1038
1039 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1040 # Just move the _offset index if the new position is in the _readbuffer
1041 self._offset = buff_offset
1042 read_offset = 0
1043 elif read_offset < 0:
1044 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001045 self._fileobj.seek(self._orig_compress_start)
1046 self._running_crc = self._orig_start_crc
1047 self._compress_left = self._orig_compress_size
1048 self._left = self._orig_file_size
1049 self._readbuffer = b''
1050 self._offset = 0
Miss Islington (bot)ad4f64d2018-07-29 12:57:21 -07001051 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001052 self._eof = False
1053 read_offset = new_pos
1054
1055 while read_offset > 0:
1056 read_len = min(self.MAX_SEEK_READ, read_offset)
1057 self.read(read_len)
1058 read_offset -= read_len
1059
1060 return self.tell()
1061
1062 def tell(self):
1063 if not self._seekable:
1064 raise io.UnsupportedOperation("underlying stream is not seekable")
1065 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1066 return filepos
1067
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001068
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001069class _ZipWriteFile(io.BufferedIOBase):
1070 def __init__(self, zf, zinfo, zip64):
1071 self._zinfo = zinfo
1072 self._zip64 = zip64
1073 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001074 self._compressor = _get_compressor(zinfo.compress_type,
1075 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001076 self._file_size = 0
1077 self._compress_size = 0
1078 self._crc = 0
1079
1080 @property
1081 def _fileobj(self):
1082 return self._zipfile.fp
1083
1084 def writable(self):
1085 return True
1086
1087 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001088 if self.closed:
1089 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001090 nbytes = len(data)
1091 self._file_size += nbytes
1092 self._crc = crc32(data, self._crc)
1093 if self._compressor:
1094 data = self._compressor.compress(data)
1095 self._compress_size += len(data)
1096 self._fileobj.write(data)
1097 return nbytes
1098
1099 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001100 if self.closed:
1101 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001102 super().close()
1103 # Flush any data from the compressor, and update header info
1104 if self._compressor:
1105 buf = self._compressor.flush()
1106 self._compress_size += len(buf)
1107 self._fileobj.write(buf)
1108 self._zinfo.compress_size = self._compress_size
1109 else:
1110 self._zinfo.compress_size = self._file_size
1111 self._zinfo.CRC = self._crc
1112 self._zinfo.file_size = self._file_size
1113
1114 # Write updated header info
1115 if self._zinfo.flag_bits & 0x08:
1116 # Write CRC and file sizes after the file data
1117 fmt = '<LQQ' if self._zip64 else '<LLL'
1118 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1119 self._zinfo.compress_size, self._zinfo.file_size))
1120 self._zipfile.start_dir = self._fileobj.tell()
1121 else:
1122 if not self._zip64:
1123 if self._file_size > ZIP64_LIMIT:
1124 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1125 'limit')
1126 if self._compress_size > ZIP64_LIMIT:
1127 raise RuntimeError('Compressed size unexpectedly exceeded '
1128 'ZIP64 limit')
1129 # Seek backwards and write file header (which will now include
1130 # correct CRC and file sizes)
1131
1132 # Preserve current position in file
1133 self._zipfile.start_dir = self._fileobj.tell()
1134 self._fileobj.seek(self._zinfo.header_offset)
1135 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1136 self._fileobj.seek(self._zipfile.start_dir)
1137
1138 self._zipfile._writing = False
1139
1140 # Successfully written: Add file to our caches
1141 self._zipfile.filelist.append(self._zinfo)
1142 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1143
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001144class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001145 """ Class with methods to open, read, write, close, list zip files.
1146
Bo Baylesce237c72018-01-29 23:54:07 -06001147 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1148 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001149
Fred Drake3d9091e2001-03-26 15:49:24 +00001150 file: Either the path to the file, or a file-like object.
1151 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001152 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1153 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001154 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1155 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001156 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1157 needed, otherwise it will raise an exception when this would
1158 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001159 compresslevel: None (default for the given compression type) or an integer
1160 specifying the level to pass to the compressor.
1161 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1162 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1163 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001164
Fred Drake3d9091e2001-03-26 15:49:24 +00001165 """
Fred Drake484d7352000-10-02 21:14:52 +00001166
Fred Drake90eac282001-02-28 05:29:34 +00001167 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001168 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001169
Bo Baylesce237c72018-01-29 23:54:07 -06001170 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1171 compresslevel=None):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001172 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1173 or append 'a'."""
1174 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001175 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001176
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001177 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001178
1179 self._allowZip64 = allowZip64
1180 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001181 self.debug = 0 # Level of printing: 0 through 3
1182 self.NameToInfo = {} # Find file info given name
1183 self.filelist = [] # List of ZipInfo instances for archive
1184 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001185 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001186 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001187 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001188 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001189
Fred Drake3d9091e2001-03-26 15:49:24 +00001190 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001191 if isinstance(file, os.PathLike):
1192 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001193 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001194 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001195 self._filePassed = 0
1196 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001197 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1198 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001199 filemode = modeDict[mode]
1200 while True:
1201 try:
1202 self.fp = io.open(file, filemode)
1203 except OSError:
1204 if filemode in modeDict:
1205 filemode = modeDict[filemode]
1206 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001207 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001208 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001209 else:
1210 self._filePassed = 1
1211 self.fp = file
1212 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001213 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001214 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001215 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001216 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001217
Antoine Pitrou17babc52012-11-17 23:50:08 +01001218 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001219 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001220 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001221 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001222 # set the modified flag so central directory gets written
1223 # even if no files are added to the archive
1224 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001225 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001226 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001227 except (AttributeError, OSError):
1228 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001229 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001230 self._seekable = False
1231 else:
1232 # Some file-like objects can provide tell() but not seek()
1233 try:
1234 self.fp.seek(self.start_dir)
1235 except (AttributeError, OSError):
1236 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001237 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001238 try:
1239 # See if file is a zip file
1240 self._RealGetContents()
1241 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001242 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001243 except BadZipFile:
1244 # file is not a zip file, just append
1245 self.fp.seek(0, 2)
1246
1247 # set the modified flag so central directory gets written
1248 # even if no files are added to the archive
1249 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001250 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001251 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001252 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001253 except:
1254 fp = self.fp
1255 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001256 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001257 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001259 def __enter__(self):
1260 return self
1261
1262 def __exit__(self, type, value, traceback):
1263 self.close()
1264
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001265 def __repr__(self):
1266 result = ['<%s.%s' % (self.__class__.__module__,
1267 self.__class__.__qualname__)]
1268 if self.fp is not None:
1269 if self._filePassed:
1270 result.append(' file=%r' % self.fp)
1271 elif self.filename is not None:
1272 result.append(' filename=%r' % self.filename)
1273 result.append(' mode=%r' % self.mode)
1274 else:
1275 result.append(' [closed]')
1276 result.append('>')
1277 return ''.join(result)
1278
Tim Peters7d3bad62001-04-04 18:56:49 +00001279 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001280 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001281 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001282 try:
1283 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001284 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001285 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001286 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001287 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001289 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001290 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1291 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001292 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001293
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001294 # "concat" is zero, unless zip was concatenated to another file
1295 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001296 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1297 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001298 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001299
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001300 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001301 inferred = concat + offset_cd
1302 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001304 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001306 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001307 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 total = 0
1309 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001310 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001311 if len(centdir) != sizeCentralDir:
1312 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001314 if centdir[_CD_SIGNATURE] != stringCentralDir:
1315 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001316 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001317 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001318 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001319 flags = centdir[5]
1320 if flags & 0x800:
1321 # UTF-8 file names extension
1322 filename = filename.decode('utf-8')
1323 else:
1324 # Historical ZIP filename encoding
1325 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001327 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001328 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1329 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001330 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001331 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001332 x.flag_bits, x.compress_type, t, d,
1333 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001334 if x.extract_version > MAX_EXTRACT_VERSION:
1335 raise NotImplementedError("zip file version %.1f" %
1336 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1338 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001339 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001341 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001342
1343 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001344 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 self.filelist.append(x)
1346 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001347
1348 # update total bytes read from central directory
1349 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1350 + centdir[_CD_EXTRA_FIELD_LENGTH]
1351 + centdir[_CD_COMMENT_LENGTH])
1352
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001353 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001354 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001355
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001356
1357 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001358 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001359 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001360
1361 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001362 """Return a list of class ZipInfo instances for files in the
1363 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001364 return self.filelist
1365
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001366 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001367 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001368 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1369 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001370 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001371 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001372 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1373 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001374
1375 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001376 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001377 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001378 for zinfo in self.filelist:
1379 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001380 # Read by chunks, to avoid an OverflowError or a
1381 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001382 with self.open(zinfo.filename, "r") as f:
1383 while f.read(chunk_size): # Check CRC-32
1384 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001385 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386 return zinfo.filename
1387
1388 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001389 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001390 info = self.NameToInfo.get(name)
1391 if info is None:
1392 raise KeyError(
1393 'There is no item named %r in the archive' % name)
1394
1395 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396
Thomas Wouterscf297e42007-02-23 15:07:44 +00001397 def setpassword(self, pwd):
1398 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001399 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001400 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001401 if pwd:
1402 self.pwd = pwd
1403 else:
1404 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001405
R David Murrayf50b38a2012-04-12 18:44:58 -04001406 @property
1407 def comment(self):
1408 """The comment text associated with the ZIP file."""
1409 return self._comment
1410
1411 @comment.setter
1412 def comment(self, comment):
1413 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001414 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001415 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001416 if len(comment) > ZIP_MAX_COMMENT:
1417 import warnings
1418 warnings.warn('Archive comment is too long; truncating to %d bytes'
1419 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001420 comment = comment[:ZIP_MAX_COMMENT]
1421 self._comment = comment
1422 self._didModify = True
1423
Thomas Wouterscf297e42007-02-23 15:07:44 +00001424 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001425 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001426 with self.open(name, "r", pwd) as fp:
1427 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001428
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001429 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001430 """Return file-like object for 'name'.
1431
1432 name is a string for the file name within the ZIP file, or a ZipInfo
1433 object.
1434
1435 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1436 write to a file newly added to the archive.
1437
1438 pwd is the password to decrypt files (only used for reading).
1439
1440 When writing, if the file size is not known in advance but may exceed
1441 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1442 files. If the size is known in advance, it is best to pass a ZipInfo
1443 instance for name, with zinfo.file_size set.
1444 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001445 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001446 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001447 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001448 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001449 if pwd and (mode == "w"):
1450 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001451 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001452 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001453 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001454
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001455 # Make sure we have an info object
1456 if isinstance(name, ZipInfo):
1457 # 'name' is already an info object
1458 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001459 elif mode == 'w':
1460 zinfo = ZipInfo(name)
1461 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001462 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001463 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001464 # Get info object for name
1465 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001466
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001467 if mode == 'w':
1468 return self._open_to_write(zinfo, force_zip64=force_zip64)
1469
1470 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001471 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001472 "is an open writing handle on it. "
1473 "Close the writing handle before trying to read.")
1474
1475 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001476 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001477 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1478 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001479 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001480 # Skip the file header:
1481 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001482 if len(fheader) != sizeFileHeader:
1483 raise BadZipFile("Truncated file header")
1484 fheader = struct.unpack(structFileHeader, fheader)
1485 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001486 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001487
Antoine Pitrou17babc52012-11-17 23:50:08 +01001488 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1489 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1490 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001491
Antoine Pitrou8572da52012-11-17 23:52:05 +01001492 if zinfo.flag_bits & 0x20:
1493 # Zip 2.7: compressed patched data
1494 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001495
Antoine Pitrou8572da52012-11-17 23:52:05 +01001496 if zinfo.flag_bits & 0x40:
1497 # strong encryption
1498 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001499
Antoine Pitrou17babc52012-11-17 23:50:08 +01001500 if zinfo.flag_bits & 0x800:
1501 # UTF-8 filename
1502 fname_str = fname.decode("utf-8")
1503 else:
1504 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001505
Antoine Pitrou17babc52012-11-17 23:50:08 +01001506 if fname_str != zinfo.orig_filename:
1507 raise BadZipFile(
1508 'File name in directory %r and header %r differ.'
1509 % (zinfo.orig_filename, fname))
1510
1511 # check for encrypted flag & handle password
1512 is_encrypted = zinfo.flag_bits & 0x1
1513 zd = None
1514 if is_encrypted:
1515 if not pwd:
1516 pwd = self.pwd
1517 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001518 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001519 "required for extraction" % name)
1520
1521 zd = _ZipDecrypter(pwd)
1522 # The first 12 bytes in the cypher stream is an encryption header
1523 # used to strengthen the algorithm. The first 11 bytes are
1524 # completely random, while the 12th contains the MSB of the CRC,
1525 # or the MSB of the file time depending on the header type
1526 # and is used to check the correctness of the password.
1527 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001528 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001529 if zinfo.flag_bits & 0x8:
1530 # compare against the file type from extended local headers
1531 check_byte = (zinfo._raw_time >> 8) & 0xff
1532 else:
1533 # compare against the CRC otherwise
1534 check_byte = (zinfo.CRC >> 24) & 0xff
1535 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001536 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001537
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001538 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001539 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001540 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001541 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001542
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001543 def _open_to_write(self, zinfo, force_zip64=False):
1544 if force_zip64 and not self._allowZip64:
1545 raise ValueError(
1546 "force_zip64 is True, but allowZip64 was False when opening "
1547 "the ZIP file."
1548 )
1549 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001550 raise ValueError("Can't write to the ZIP file while there is "
1551 "another write handle open on it. "
1552 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001553
1554 # Sizes and CRC are overwritten with correct data after processing the file
1555 if not hasattr(zinfo, 'file_size'):
1556 zinfo.file_size = 0
1557 zinfo.compress_size = 0
1558 zinfo.CRC = 0
1559
1560 zinfo.flag_bits = 0x00
1561 if zinfo.compress_type == ZIP_LZMA:
1562 # Compressed data includes an end-of-stream (EOS) marker
1563 zinfo.flag_bits |= 0x02
1564 if not self._seekable:
1565 zinfo.flag_bits |= 0x08
1566
1567 if not zinfo.external_attr:
1568 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1569
1570 # Compressed size can be larger than uncompressed size
1571 zip64 = self._allowZip64 and \
1572 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1573
1574 if self._seekable:
1575 self.fp.seek(self.start_dir)
1576 zinfo.header_offset = self.fp.tell()
1577
1578 self._writecheck(zinfo)
1579 self._didModify = True
1580
1581 self.fp.write(zinfo.FileHeader(zip64))
1582
1583 self._writing = True
1584 return _ZipWriteFile(self, zinfo, zip64)
1585
Christian Heimes790c8232008-01-07 21:14:23 +00001586 def extract(self, member, path=None, pwd=None):
1587 """Extract a member from the archive to the current working directory,
1588 using its full name. Its file information is extracted as accurately
1589 as possible. `member' may be a filename or a ZipInfo object. You can
1590 specify a different directory using `path'.
1591 """
Christian Heimes790c8232008-01-07 21:14:23 +00001592 if path is None:
1593 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001594 else:
1595 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001596
1597 return self._extract_member(member, path, pwd)
1598
1599 def extractall(self, path=None, members=None, pwd=None):
1600 """Extract all members from the archive to the current working
1601 directory. `path' specifies a different directory to extract to.
1602 `members' is optional and must be a subset of the list returned
1603 by namelist().
1604 """
1605 if members is None:
1606 members = self.namelist()
1607
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001608 if path is None:
1609 path = os.getcwd()
1610 else:
1611 path = os.fspath(path)
1612
Christian Heimes790c8232008-01-07 21:14:23 +00001613 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001614 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001615
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001616 @classmethod
1617 def _sanitize_windows_name(cls, arcname, pathsep):
1618 """Replace bad characters and remove trailing dots from parts."""
1619 table = cls._windows_illegal_name_trans_table
1620 if not table:
1621 illegal = ':<>|"?*'
1622 table = str.maketrans(illegal, '_' * len(illegal))
1623 cls._windows_illegal_name_trans_table = table
1624 arcname = arcname.translate(table)
1625 # remove trailing dots
1626 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1627 # rejoin, removing empty parts.
1628 arcname = pathsep.join(x for x in arcname if x)
1629 return arcname
1630
Christian Heimes790c8232008-01-07 21:14:23 +00001631 def _extract_member(self, member, targetpath, pwd):
1632 """Extract the ZipInfo object 'member' to a physical
1633 file on the path targetpath.
1634 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001635 if not isinstance(member, ZipInfo):
1636 member = self.getinfo(member)
1637
Christian Heimes790c8232008-01-07 21:14:23 +00001638 # build the destination pathname, replacing
1639 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001640 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001641
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001642 if os.path.altsep:
1643 arcname = arcname.replace(os.path.altsep, os.path.sep)
1644 # interpret absolute pathname as relative, remove drive letter or
1645 # UNC path, redundant separators, "." and ".." components.
1646 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001647 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001648 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001649 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001650 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001651 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001652 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001653
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001654 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001655 targetpath = os.path.normpath(targetpath)
1656
1657 # Create all upper directories if necessary.
1658 upperdirs = os.path.dirname(targetpath)
1659 if upperdirs and not os.path.exists(upperdirs):
1660 os.makedirs(upperdirs)
1661
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001662 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001663 if not os.path.isdir(targetpath):
1664 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001665 return targetpath
1666
Antoine Pitrou17babc52012-11-17 23:50:08 +01001667 with self.open(member, pwd=pwd) as source, \
1668 open(targetpath, "wb") as target:
1669 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001670
1671 return targetpath
1672
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001673 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001674 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001675 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001676 import warnings
1677 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001678 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001679 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001680 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001681 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001682 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001683 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001684 if not self._allowZip64:
1685 requires_zip64 = None
1686 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1687 requires_zip64 = "Files count"
1688 elif zinfo.file_size > ZIP64_LIMIT:
1689 requires_zip64 = "Filesize"
1690 elif zinfo.header_offset > ZIP64_LIMIT:
1691 requires_zip64 = "Zipfile size"
1692 if requires_zip64:
1693 raise LargeZipFile(requires_zip64 +
1694 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001695
Bo Baylesce237c72018-01-29 23:54:07 -06001696 def write(self, filename, arcname=None,
1697 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001698 """Put the bytes from filename into the archive under the name
1699 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001700 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001701 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001702 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001703 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001704 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001705 "Can't write to ZIP archive while an open writing handle exists"
1706 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001707
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001708 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001709
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001710 if zinfo.is_dir():
1711 zinfo.compress_size = 0
1712 zinfo.CRC = 0
1713 else:
1714 if compress_type is not None:
1715 zinfo.compress_type = compress_type
1716 else:
1717 zinfo.compress_type = self.compression
1718
Bo Baylesce237c72018-01-29 23:54:07 -06001719 if compresslevel is not None:
1720 zinfo._compresslevel = compresslevel
1721 else:
1722 zinfo._compresslevel = self.compresslevel
1723
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001724 if zinfo.is_dir():
1725 with self._lock:
1726 if self._seekable:
1727 self.fp.seek(self.start_dir)
1728 zinfo.header_offset = self.fp.tell() # Start of header bytes
1729 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001730 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001731 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001732
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001733 self._writecheck(zinfo)
1734 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001735
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001736 self.filelist.append(zinfo)
1737 self.NameToInfo[zinfo.filename] = zinfo
1738 self.fp.write(zinfo.FileHeader(False))
1739 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001740 else:
1741 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1742 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001743
Bo Baylesce237c72018-01-29 23:54:07 -06001744 def writestr(self, zinfo_or_arcname, data,
1745 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001746 """Write a file into the archive. The contents is 'data', which
1747 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1748 it is encoded as UTF-8 first.
1749 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001750 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001751 if isinstance(data, str):
1752 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001753 if not isinstance(zinfo_or_arcname, ZipInfo):
1754 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001755 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001756 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001757 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001758 if zinfo.filename[-1] == '/':
1759 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1760 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1761 else:
1762 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001763 else:
1764 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001765
1766 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001767 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001768 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001769 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001770 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001771 "Can't write to ZIP archive while an open writing handle exists."
1772 )
1773
1774 if compress_type is not None:
1775 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001776
Bo Baylesce237c72018-01-29 23:54:07 -06001777 if compresslevel is not None:
1778 zinfo._compresslevel = compresslevel
1779
Guido van Rossum85825dc2007-08-27 17:03:28 +00001780 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001781 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001782 with self.open(zinfo, mode='w') as dest:
1783 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001784
1785 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001786 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001787 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001788
1789 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001790 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001791 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001792 if self.fp is None:
1793 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001794
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001795 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001796 raise ValueError("Can't close the ZIP file while there is "
1797 "an open writing handle on it. "
1798 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001799
Antoine Pitrou17babc52012-11-17 23:50:08 +01001800 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001801 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001802 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001803 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001804 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001805 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001806 finally:
1807 fp = self.fp
1808 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001809 self._fpclose(fp)
1810
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001811 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001812 for zinfo in self.filelist: # write central directory
1813 dt = zinfo.date_time
1814 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1815 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1816 extra = []
1817 if zinfo.file_size > ZIP64_LIMIT \
1818 or zinfo.compress_size > ZIP64_LIMIT:
1819 extra.append(zinfo.file_size)
1820 extra.append(zinfo.compress_size)
1821 file_size = 0xffffffff
1822 compress_size = 0xffffffff
1823 else:
1824 file_size = zinfo.file_size
1825 compress_size = zinfo.compress_size
1826
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001827 if zinfo.header_offset > ZIP64_LIMIT:
1828 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001829 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001830 else:
1831 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001832
1833 extra_data = zinfo.extra
1834 min_version = 0
1835 if extra:
1836 # Append a ZIP64 field to the extra's
Miss Islington (bot)efdf3162018-09-17 06:08:45 -07001837 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001838 extra_data = struct.pack(
1839 '<HH' + 'Q'*len(extra),
1840 1, 8*len(extra), *extra) + extra_data
1841
1842 min_version = ZIP64_VERSION
1843
1844 if zinfo.compress_type == ZIP_BZIP2:
1845 min_version = max(BZIP2_VERSION, min_version)
1846 elif zinfo.compress_type == ZIP_LZMA:
1847 min_version = max(LZMA_VERSION, min_version)
1848
1849 extract_version = max(min_version, zinfo.extract_version)
1850 create_version = max(min_version, zinfo.create_version)
1851 try:
1852 filename, flag_bits = zinfo._encodeFilenameFlags()
1853 centdir = struct.pack(structCentralDir,
1854 stringCentralDir, create_version,
1855 zinfo.create_system, extract_version, zinfo.reserved,
1856 flag_bits, zinfo.compress_type, dostime, dosdate,
1857 zinfo.CRC, compress_size, file_size,
1858 len(filename), len(extra_data), len(zinfo.comment),
1859 0, zinfo.internal_attr, zinfo.external_attr,
1860 header_offset)
1861 except DeprecationWarning:
1862 print((structCentralDir, stringCentralDir, create_version,
1863 zinfo.create_system, extract_version, zinfo.reserved,
1864 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1865 zinfo.CRC, compress_size, file_size,
1866 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1867 0, zinfo.internal_attr, zinfo.external_attr,
1868 header_offset), file=sys.stderr)
1869 raise
1870 self.fp.write(centdir)
1871 self.fp.write(filename)
1872 self.fp.write(extra_data)
1873 self.fp.write(zinfo.comment)
1874
1875 pos2 = self.fp.tell()
1876 # Write end-of-zip-archive record
1877 centDirCount = len(self.filelist)
1878 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001879 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001880 requires_zip64 = None
1881 if centDirCount > ZIP_FILECOUNT_LIMIT:
1882 requires_zip64 = "Files count"
1883 elif centDirOffset > ZIP64_LIMIT:
1884 requires_zip64 = "Central directory offset"
1885 elif centDirSize > ZIP64_LIMIT:
1886 requires_zip64 = "Central directory size"
1887 if requires_zip64:
1888 # Need to write the ZIP64 end-of-archive records
1889 if not self._allowZip64:
1890 raise LargeZipFile(requires_zip64 +
1891 " would require ZIP64 extensions")
1892 zip64endrec = struct.pack(
1893 structEndArchive64, stringEndArchive64,
1894 44, 45, 45, 0, 0, centDirCount, centDirCount,
1895 centDirSize, centDirOffset)
1896 self.fp.write(zip64endrec)
1897
1898 zip64locrec = struct.pack(
1899 structEndArchive64Locator,
1900 stringEndArchive64Locator, 0, pos2, 1)
1901 self.fp.write(zip64locrec)
1902 centDirCount = min(centDirCount, 0xFFFF)
1903 centDirSize = min(centDirSize, 0xFFFFFFFF)
1904 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1905
1906 endrec = struct.pack(structEndArchive, stringEndArchive,
1907 0, 0, centDirCount, centDirCount,
1908 centDirSize, centDirOffset, len(self._comment))
1909 self.fp.write(endrec)
1910 self.fp.write(self._comment)
1911 self.fp.flush()
1912
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001913 def _fpclose(self, fp):
1914 assert self._fileRefCnt > 0
1915 self._fileRefCnt -= 1
1916 if not self._fileRefCnt and not self._filePassed:
1917 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001918
1919
1920class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001921 """Class to create ZIP archives with Python library files and packages."""
1922
Georg Brandl8334fd92010-12-04 10:26:46 +00001923 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001924 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001925 ZipFile.__init__(self, file, mode=mode, compression=compression,
1926 allowZip64=allowZip64)
1927 self._optimize = optimize
1928
Christian Tismer59202e52013-10-21 03:59:23 +02001929 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001930 """Add all files from "pathname" to the ZIP archive.
1931
Fred Drake484d7352000-10-02 21:14:52 +00001932 If pathname is a package directory, search the directory and
1933 all package subdirectories recursively for all *.py and enter
1934 the modules into the archive. If pathname is a plain
1935 directory, listdir *.py and enter all modules. Else, pathname
1936 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001937 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001938 This method will compile the module.py into module.pyc if
1939 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001940 If filterfunc(pathname) is given, it is called with every argument.
1941 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001942 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001943 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001944 if filterfunc and not filterfunc(pathname):
1945 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001946 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001947 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001948 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001949 dir, name = os.path.split(pathname)
1950 if os.path.isdir(pathname):
1951 initname = os.path.join(pathname, "__init__.py")
1952 if os.path.isfile(initname):
1953 # This is a package directory, add it
1954 if basename:
1955 basename = "%s/%s" % (basename, name)
1956 else:
1957 basename = name
1958 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001959 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001960 fname, arcname = self._get_codename(initname[0:-3], basename)
1961 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001962 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001963 self.write(fname, arcname)
Bernhard M. Wiedemann57750be2018-01-31 11:17:10 +01001964 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001965 dirlist.remove("__init__.py")
1966 # Add all *.py files and package subdirectories
1967 for filename in dirlist:
1968 path = os.path.join(pathname, filename)
1969 root, ext = os.path.splitext(filename)
1970 if os.path.isdir(path):
1971 if os.path.isfile(os.path.join(path, "__init__.py")):
1972 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001973 self.writepy(path, basename,
1974 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001975 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001976 if filterfunc and not filterfunc(path):
1977 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001978 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001979 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001980 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001981 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001982 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001983 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001984 self.write(fname, arcname)
1985 else:
1986 # This is NOT a package directory, add its files at top level
1987 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001988 print("Adding files from directory", pathname)
Bernhard M. Wiedemann57750be2018-01-31 11:17:10 +01001989 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001990 path = os.path.join(pathname, filename)
1991 root, ext = os.path.splitext(filename)
1992 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001993 if filterfunc and not filterfunc(path):
1994 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001995 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001996 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001997 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001998 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001999 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002000 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002001 self.write(fname, arcname)
2002 else:
2003 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002004 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002005 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002006 fname, arcname = self._get_codename(pathname[0:-3], basename)
2007 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002008 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002009 self.write(fname, arcname)
2010
2011 def _get_codename(self, pathname, basename):
2012 """Return (filename, archivename) for the path.
2013
Fred Drake484d7352000-10-02 21:14:52 +00002014 Given a module name path, return the correct file path and
2015 archive name, compiling if necessary. For example, given
2016 /python/lib/string, return (/python/lib/string.pyc, string).
2017 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002018 def _compile(file, optimize=-1):
2019 import py_compile
2020 if self.debug:
2021 print("Compiling", file)
2022 try:
2023 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002024 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002025 print(err.msg)
2026 return False
2027 return True
2028
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002029 file_py = pathname + ".py"
2030 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002031 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2032 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2033 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002034 if self._optimize == -1:
2035 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002036 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002037 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2038 # Use .pyc file.
2039 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002040 elif (os.path.isfile(pycache_opt0) and
2041 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002042 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2043 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002044 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002045 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002046 elif (os.path.isfile(pycache_opt1) and
2047 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2048 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002049 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002050 fname = pycache_opt1
2051 arcname = file_pyc
2052 elif (os.path.isfile(pycache_opt2) and
2053 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2054 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2055 # file name in the archive.
2056 fname = pycache_opt2
2057 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002058 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002059 # Compile py into PEP 3147 pyc file.
2060 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002061 if sys.flags.optimize == 0:
2062 fname = pycache_opt0
2063 elif sys.flags.optimize == 1:
2064 fname = pycache_opt1
2065 else:
2066 fname = pycache_opt2
2067 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002068 else:
2069 fname = arcname = file_py
2070 else:
2071 # new mode: use given optimization level
2072 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002073 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002074 arcname = file_pyc
2075 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002076 arcname = file_pyc
2077 if self._optimize == 1:
2078 fname = pycache_opt1
2079 elif self._optimize == 2:
2080 fname = pycache_opt2
2081 else:
2082 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2083 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002084 if not (os.path.isfile(fname) and
2085 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2086 if not _compile(file_py, optimize=self._optimize):
2087 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002088 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002089 if basename:
2090 archivename = "%s/%s" % (basename, archivename)
2091 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002092
2093
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002094def main(args=None):
2095 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002096
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002097 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002098 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002099 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002100 group.add_argument('-l', '--list', metavar='<zipfile>',
2101 help='Show listing of a zipfile')
2102 group.add_argument('-e', '--extract', nargs=2,
2103 metavar=('<zipfile>', '<output_dir>'),
2104 help='Extract zipfile into target dir')
2105 group.add_argument('-c', '--create', nargs='+',
2106 metavar=('<name>', '<file>'),
2107 help='Create zipfile from sources')
2108 group.add_argument('-t', '--test', metavar='<zipfile>',
2109 help='Test if a zipfile is valid')
2110 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002111
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002112 if args.test is not None:
2113 src = args.test
2114 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002115 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002116 if badfile:
2117 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002118 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002119
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002120 elif args.list is not None:
2121 src = args.list
2122 with ZipFile(src, 'r') as zf:
2123 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002124
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002125 elif args.extract is not None:
2126 src, curdir = args.extract
2127 with ZipFile(src, 'r') as zf:
2128 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002129
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002130 elif args.create is not None:
2131 zip_name = args.create.pop(0)
2132 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002133
2134 def addToZip(zf, path, zippath):
2135 if os.path.isfile(path):
2136 zf.write(path, zippath, ZIP_DEFLATED)
2137 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002138 if zippath:
2139 zf.write(path, zippath)
Bernhard M. Wiedemann57750be2018-01-31 11:17:10 +01002140 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002141 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002142 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002143 # else: ignore
2144
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002145 with ZipFile(zip_name, 'w') as zf:
2146 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002147 zippath = os.path.basename(path)
2148 if not zippath:
2149 zippath = os.path.basename(os.path.dirname(path))
2150 if zippath in ('', os.curdir, os.pardir):
2151 zippath = ''
2152 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002153
2154if __name__ == "__main__":
2155 main()