blob: 89df90b25209f830ba657c24ffd58888d82c59f9 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300162_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
163
164def _strip_extra(extra, xids):
165 # Remove Extra Fields with specified IDs.
166 unpack = _EXTRA_FIELD_STRUCT.unpack
167 modified = False
168 buffer = []
169 start = i = 0
170 while i + 4 <= len(extra):
171 xid, xlen = unpack(extra[i : i + 4])
172 j = i + 4 + xlen
173 if xid in xids:
174 if i != start:
175 buffer.append(extra[start : i])
176 start = j
177 modified = True
178 i = j
179 if not modified:
180 return extra
181 return b''.join(buffer)
182
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000183def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000184 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185 if _EndRecData(fp):
186 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200187 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000188 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000189 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000190
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000191def is_zipfile(filename):
192 """Quickly see if a file is a ZIP file by checking the magic number.
193
194 The filename argument may be a file or file-like object too.
195 """
196 result = False
197 try:
198 if hasattr(filename, "read"):
199 result = _check_zipfile(fp=filename)
200 else:
201 with open(filename, "rb") as fp:
202 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200203 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000204 pass
205 return result
206
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207def _EndRecData64(fpin, offset, endrec):
208 """
209 Read the ZIP64 end-of-archive records and use that to update endrec
210 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000211 try:
212 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200213 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000214 # If the seek fails, the file is not large enough to contain a ZIP64
215 # end-of-archive record, so just return the end record we were given.
216 return endrec
217
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200219 if len(data) != sizeEndCentDir64Locator:
220 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000221 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
222 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000223 return endrec
224
225 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000226 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227
228 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000229 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
230 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200231 if len(data) != sizeEndCentDir64:
232 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000233 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200234 dircount, dircount2, dirsize, diroffset = \
235 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000236 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000237 return endrec
238
239 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000240 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000241 endrec[_ECD_DISK_NUMBER] = disk_num
242 endrec[_ECD_DISK_START] = disk_dir
243 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
244 endrec[_ECD_ENTRIES_TOTAL] = dircount2
245 endrec[_ECD_SIZE] = dirsize
246 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000247 return endrec
248
249
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000250def _EndRecData(fpin):
251 """Return data from the "End of Central Directory" record, or None.
252
253 The data is a list of the nine items in the ZIP "End of central dir"
254 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255
256 # Determine file size
257 fpin.seek(0, 2)
258 filesize = fpin.tell()
259
260 # Check to see if this is ZIP file with no archive comment (the
261 # "end of central directory" structure should be the last item in the
262 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000263 try:
264 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200265 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000266 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000267 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200268 if (len(data) == sizeEndCentDir and
269 data[0:4] == stringEndArchive and
270 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000271 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000272 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000273 endrec=list(endrec)
274
275 # Append a blank comment and record start offset
276 endrec.append(b"")
277 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000278
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000279 # Try to read the "Zip64 end of central directory" structure
280 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000281
282 # Either this is not a ZIP file, or it is a ZIP file with an archive
283 # comment. Search the end of the file for the "end of central directory"
284 # record signature. The comment is the last item in the ZIP file and may be
285 # up to 64K long. It is assumed that the "end of central directory" magic
286 # number does not appear in the comment.
287 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
288 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000289 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000290 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000291 if start >= 0:
292 # found the magic number; attempt to unpack and interpret
293 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200294 if len(recData) != sizeEndCentDir:
295 # Zip file is corrupted.
296 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000297 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400298 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
299 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
300 endrec.append(comment)
301 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000302
R David Murray4fbb9db2011-06-09 15:50:51 -0400303 # Try to read the "Zip64 end of central directory" structure
304 return _EndRecData64(fpin, maxCommentStart + start - filesize,
305 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000306
307 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200308 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000309
Fred Drake484d7352000-10-02 21:14:52 +0000310
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000311class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Class with attributes describing each file in the ZIP archive."""
313
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000314 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200315 'orig_filename',
316 'filename',
317 'date_time',
318 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600319 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200320 'comment',
321 'extra',
322 'create_system',
323 'create_version',
324 'extract_version',
325 'reserved',
326 'flag_bits',
327 'volume',
328 'internal_attr',
329 'external_attr',
330 'header_offset',
331 'CRC',
332 'compress_size',
333 'file_size',
334 '_raw_time',
335 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000336
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000338 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339
340 # Terminate the file name at the first null byte. Null bytes in file
341 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000342 null_byte = filename.find(chr(0))
343 if null_byte >= 0:
344 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000345 # This is used to ensure paths in generated ZIP files always use
346 # forward slashes as the directory separator, as required by the
347 # ZIP format specification.
348 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000349 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350
Greg Ward8e36d282003-06-18 00:53:06 +0000351 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000352 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800353
354 if date_time[0] < 1980:
355 raise ValueError('ZIP does not support timestamps before 1980')
356
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000358 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600359 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000360 self.comment = b"" # Comment for each file
361 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000362 if sys.platform == 'win32':
363 self.create_system = 0 # System which created ZIP archive
364 else:
365 # Assume everything else is unix-y
366 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200367 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
368 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000369 self.reserved = 0 # Must be zero
370 self.flag_bits = 0 # ZIP flag bits
371 self.volume = 0 # Volume number of file header
372 self.internal_attr = 0 # Internal attributes
373 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000374 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000375 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000376 # CRC CRC-32 of the uncompressed file
377 # compress_size Size of the compressed file
378 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000379
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200380 def __repr__(self):
381 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
382 if self.compress_type != ZIP_STORED:
383 result.append(' compress_type=%s' %
384 compressor_names.get(self.compress_type,
385 self.compress_type))
386 hi = self.external_attr >> 16
387 lo = self.external_attr & 0xFFFF
388 if hi:
389 result.append(' filemode=%r' % stat.filemode(hi))
390 if lo:
391 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200392 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200393 if not isdir or self.file_size:
394 result.append(' file_size=%r' % self.file_size)
395 if ((not isdir or self.compress_size) and
396 (self.compress_type != ZIP_STORED or
397 self.file_size != self.compress_size)):
398 result.append(' compress_size=%r' % self.compress_size)
399 result.append('>')
400 return ''.join(result)
401
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200402 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000403 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404 dt = self.date_time
405 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000406 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000407 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000408 # Set these to zero because we write them after the file data
409 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000410 else:
Tim Peterse1190062001-01-15 03:34:38 +0000411 CRC = self.CRC
412 compress_size = self.compress_size
413 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414
415 extra = self.extra
416
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200417 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200418 if zip64 is None:
419 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
420 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000421 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000422 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200423 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200424 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
425 if not zip64:
426 raise LargeZipFile("Filesize would require ZIP64 extensions")
427 # File is larger than what fits into a 4 byte integer,
428 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000429 file_size = 0xffffffff
430 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200431 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200433 if self.compress_type == ZIP_BZIP2:
434 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200435 elif self.compress_type == ZIP_LZMA:
436 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200437
438 self.extract_version = max(min_version, self.extract_version)
439 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000440 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000441 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200442 self.extract_version, self.reserved, flag_bits,
443 self.compress_type, dostime, dosdate, CRC,
444 compress_size, file_size,
445 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000446 return header + filename + extra
447
448 def _encodeFilenameFlags(self):
449 try:
450 return self.filename.encode('ascii'), self.flag_bits
451 except UnicodeEncodeError:
452 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
454 def _decodeExtra(self):
455 # Try to decode the extra field.
456 extra = self.extra
457 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700458 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000459 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200460 if ln+4 > len(extra):
461 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
462 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000464 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000466 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000468 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 elif ln == 0:
470 counts = ()
471 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300472 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000473
474 idx = 0
475
476 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000477 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000478 self.file_size = counts[idx]
479 idx += 1
480
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000481 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000482 self.compress_size = counts[idx]
483 idx += 1
484
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000485 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000486 old = self.header_offset
487 self.header_offset = counts[idx]
488 idx+=1
489
490 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000491
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200492 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200493 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200494 """Construct an appropriate ZipInfo for a file on the filesystem.
495
496 filename should be the path to a file or directory on the filesystem.
497
498 arcname is the name which it will have within the archive (by default,
499 this will be the same as filename, but without a drive letter and with
500 leading path separators removed).
501 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200502 if isinstance(filename, os.PathLike):
503 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200504 st = os.stat(filename)
505 isdir = stat.S_ISDIR(st.st_mode)
506 mtime = time.localtime(st.st_mtime)
507 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200508 if not strict_timestamps and date_time[0] < 1980:
509 date_time = (1980, 1, 1, 0, 0, 0)
510 elif not strict_timestamps and date_time[0] > 2107:
511 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200512 # Create ZipInfo instance to store file information
513 if arcname is None:
514 arcname = filename
515 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
516 while arcname[0] in (os.sep, os.altsep):
517 arcname = arcname[1:]
518 if isdir:
519 arcname += '/'
520 zinfo = cls(arcname, date_time)
521 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
522 if isdir:
523 zinfo.file_size = 0
524 zinfo.external_attr |= 0x10 # MS-DOS directory flag
525 else:
526 zinfo.file_size = st.st_size
527
528 return zinfo
529
530 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300531 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200532 return self.filename[-1] == '/'
533
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000534
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300535# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
536# internal keys. We noticed that a direct implementation is faster than
537# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000538
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300539_crctable = None
540def _gen_crc(crc):
541 for j in range(8):
542 if crc & 1:
543 crc = (crc >> 1) ^ 0xEDB88320
544 else:
545 crc >>= 1
546 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000547
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300548# ZIP supports a password-based form of encryption. Even though known
549# plaintext attacks have been found against it, it is still useful
550# to be able to get data out of such a file.
551#
552# Usage:
553# zd = _ZipDecrypter(mypwd)
554# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000555
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300556def _ZipDecrypter(pwd):
557 key0 = 305419896
558 key1 = 591751049
559 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000560
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300561 global _crctable
562 if _crctable is None:
563 _crctable = list(map(_gen_crc, range(256)))
564 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300568 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000569
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300570 def update_keys(c):
571 nonlocal key0, key1, key2
572 key0 = crc32(c, key0)
573 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
574 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
575 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000576
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300577 for p in pwd:
578 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000579
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300580 def decrypter(data):
581 """Decrypt a bytes object."""
582 result = bytearray()
583 append = result.append
584 for c in data:
585 k = key2 | 2
586 c ^= ((k * (k^1)) >> 8) & 0xFF
587 update_keys(c)
588 append(c)
589 return bytes(result)
590
591 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000592
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200593
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200594class LZMACompressor:
595
596 def __init__(self):
597 self._comp = None
598
599 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200600 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200601 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200602 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200603 ])
604 return struct.pack('<BBH', 9, 4, len(props)) + props
605
606 def compress(self, data):
607 if self._comp is None:
608 return self._init() + self._comp.compress(data)
609 return self._comp.compress(data)
610
611 def flush(self):
612 if self._comp is None:
613 return self._init() + self._comp.flush()
614 return self._comp.flush()
615
616
617class LZMADecompressor:
618
619 def __init__(self):
620 self._decomp = None
621 self._unconsumed = b''
622 self.eof = False
623
624 def decompress(self, data):
625 if self._decomp is None:
626 self._unconsumed += data
627 if len(self._unconsumed) <= 4:
628 return b''
629 psize, = struct.unpack('<H', self._unconsumed[2:4])
630 if len(self._unconsumed) <= 4 + psize:
631 return b''
632
633 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200634 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
635 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200636 ])
637 data = self._unconsumed[4 + psize:]
638 del self._unconsumed
639
640 result = self._decomp.decompress(data)
641 self.eof = self._decomp.eof
642 return result
643
644
645compressor_names = {
646 0: 'store',
647 1: 'shrink',
648 2: 'reduce',
649 3: 'reduce',
650 4: 'reduce',
651 5: 'reduce',
652 6: 'implode',
653 7: 'tokenize',
654 8: 'deflate',
655 9: 'deflate64',
656 10: 'implode',
657 12: 'bzip2',
658 14: 'lzma',
659 18: 'terse',
660 19: 'lz77',
661 97: 'wavpack',
662 98: 'ppmd',
663}
664
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200665def _check_compression(compression):
666 if compression == ZIP_STORED:
667 pass
668 elif compression == ZIP_DEFLATED:
669 if not zlib:
670 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200671 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200672 elif compression == ZIP_BZIP2:
673 if not bz2:
674 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200675 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200676 elif compression == ZIP_LZMA:
677 if not lzma:
678 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200679 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200680 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300681 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200682
683
Bo Baylesce237c72018-01-29 23:54:07 -0600684def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200685 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600686 if compresslevel is not None:
687 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
688 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200689 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600690 if compresslevel is not None:
691 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200692 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600693 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200694 elif compress_type == ZIP_LZMA:
695 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200696 else:
697 return None
698
699
700def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200701 if compress_type == ZIP_STORED:
702 return None
703 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200704 return zlib.decompressobj(-15)
705 elif compress_type == ZIP_BZIP2:
706 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200707 elif compress_type == ZIP_LZMA:
708 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200709 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200710 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200711 if descr:
712 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
713 else:
714 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200715
716
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200717class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300718 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200719 self._file = file
720 self._pos = pos
721 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200722 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300723 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700724 self.seekable = file.seekable
725 self.tell = file.tell
726
727 def seek(self, offset, whence=0):
728 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200729 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700730 raise ValueError("Can't reposition in the ZIP file while "
731 "there is an open writing handle on it. "
732 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200733 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700734 self._pos = self._file.tell()
735 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200736
737 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200738 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300739 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300740 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300741 "is an open writing handle on it. "
742 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200743 self._file.seek(self._pos)
744 data = self._file.read(n)
745 self._pos = self._file.tell()
746 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200747
748 def close(self):
749 if self._file is not None:
750 fileobj = self._file
751 self._file = None
752 self._close(fileobj)
753
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200754# Provide the tell method for unseekable stream
755class _Tellable:
756 def __init__(self, fp):
757 self.fp = fp
758 self.offset = 0
759
760 def write(self, data):
761 n = self.fp.write(data)
762 self.offset += n
763 return n
764
765 def tell(self):
766 return self.offset
767
768 def flush(self):
769 self.fp.flush()
770
771 def close(self):
772 self.fp.close()
773
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200774
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000775class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776 """File-like object for reading an archive member.
777 Is returned by ZipFile.open().
778 """
779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 # Max size supported by decompressor.
781 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000783 # Read from compressed files in 4k blocks.
784 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785
John Jolly066df4f2018-01-30 01:51:35 -0700786 # Chunk size to read during seek
787 MAX_SEEK_READ = 1 << 24
788
Łukasz Langae94980a2010-11-22 23:31:26 +0000789 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
790 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000791 self._fileobj = fileobj
792 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000793 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000794
Ezio Melotti92b47432010-01-28 01:44:41 +0000795 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000796 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200797 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000798
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200801 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 self._readbuffer = b''
803 self._offset = 0
804
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000805 self.newlines = None
806
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000807 # Adjust read size for encrypted files since the first 12 bytes
808 # are for the encryption/password information.
809 if self._decrypter is not None:
810 self._compress_left -= 12
811
812 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000813 self.name = zipinfo.filename
814
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000815 if hasattr(zipinfo, 'CRC'):
816 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000817 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000818 else:
819 self._expected_crc = None
820
John Jolly066df4f2018-01-30 01:51:35 -0700821 self._seekable = False
822 try:
823 if fileobj.seekable():
824 self._orig_compress_start = fileobj.tell()
825 self._orig_compress_size = zipinfo.compress_size
826 self._orig_file_size = zipinfo.file_size
827 self._orig_start_crc = self._running_crc
828 self._seekable = True
829 except AttributeError:
830 pass
831
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200832 def __repr__(self):
833 result = ['<%s.%s' % (self.__class__.__module__,
834 self.__class__.__qualname__)]
835 if not self.closed:
836 result.append(' name=%r mode=%r' % (self.name, self.mode))
837 if self._compress_type != ZIP_STORED:
838 result.append(' compress_type=%s' %
839 compressor_names.get(self._compress_type,
840 self._compress_type))
841 else:
842 result.append(' [closed]')
843 result.append('>')
844 return ''.join(result)
845
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000846 def readline(self, limit=-1):
847 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000849 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851
Serhiy Storchakae670be22016-06-11 19:32:44 +0300852 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000853 # Shortcut common case - newline found in buffer.
854 i = self._readbuffer.find(b'\n', self._offset) + 1
855 if i > 0:
856 line = self._readbuffer[self._offset: i]
857 self._offset = i
858 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859
Serhiy Storchakae670be22016-06-11 19:32:44 +0300860 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000861
862 def peek(self, n=1):
863 """Returns buffered bytes without advancing the position."""
864 if n > len(self._readbuffer) - self._offset:
865 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200866 if len(chunk) > self._offset:
867 self._readbuffer = chunk + self._readbuffer[self._offset:]
868 self._offset = 0
869 else:
870 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000871
872 # Return up to 512 bytes to reduce allocation overhead for tight loops.
873 return self._readbuffer[self._offset: self._offset + 512]
874
875 def readable(self):
876 return True
877
878 def read(self, n=-1):
879 """Read and return up to n bytes.
880 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 if n is None or n < 0:
883 buf = self._readbuffer[self._offset:]
884 self._readbuffer = b''
885 self._offset = 0
886 while not self._eof:
887 buf += self._read1(self.MAX_N)
888 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889
Antoine Pitrou78157b32012-06-23 16:44:48 +0200890 end = n + self._offset
891 if end < len(self._readbuffer):
892 buf = self._readbuffer[self._offset:end]
893 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200894 return buf
895
Antoine Pitrou78157b32012-06-23 16:44:48 +0200896 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200897 buf = self._readbuffer[self._offset:]
898 self._readbuffer = b''
899 self._offset = 0
900 while n > 0 and not self._eof:
901 data = self._read1(n)
902 if n < len(data):
903 self._readbuffer = data
904 self._offset = n
905 buf += data[:n]
906 break
907 buf += data
908 n -= len(data)
909 return buf
910
911 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000912 # Update the CRC using the given data.
913 if self._expected_crc is None:
914 # No need to compute the CRC if we don't have a reference value
915 return
Martin Panterb82032f2015-12-11 05:19:29 +0000916 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000917 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200918 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000919 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000920
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000921 def read1(self, n):
922 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200924 if n is None or n < 0:
925 buf = self._readbuffer[self._offset:]
926 self._readbuffer = b''
927 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300928 while not self._eof:
929 data = self._read1(self.MAX_N)
930 if data:
931 buf += data
932 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200933 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934
Antoine Pitrou78157b32012-06-23 16:44:48 +0200935 end = n + self._offset
936 if end < len(self._readbuffer):
937 buf = self._readbuffer[self._offset:end]
938 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200939 return buf
940
Antoine Pitrou78157b32012-06-23 16:44:48 +0200941 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200942 buf = self._readbuffer[self._offset:]
943 self._readbuffer = b''
944 self._offset = 0
945 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300946 while not self._eof:
947 data = self._read1(n)
948 if n < len(data):
949 self._readbuffer = data
950 self._offset = n
951 buf += data[:n]
952 break
953 if data:
954 buf += data
955 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200956 return buf
957
958 def _read1(self, n):
959 # Read up to n compressed bytes with at most one read() system call,
960 # decrypt and decompress them.
961 if self._eof or n <= 0:
962 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000964 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200965 if self._compress_type == ZIP_DEFLATED:
966 ## Handle unconsumed data.
967 data = self._decompressor.unconsumed_tail
968 if n > len(data):
969 data += self._read2(n - len(data))
970 else:
971 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000972
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200973 if self._compress_type == ZIP_STORED:
974 self._eof = self._compress_left <= 0
975 elif self._compress_type == ZIP_DEFLATED:
976 n = max(n, self.MIN_READ_SIZE)
977 data = self._decompressor.decompress(data, n)
978 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200979 self._compress_left <= 0 and
980 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200981 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000982 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200983 else:
984 data = self._decompressor.decompress(data)
985 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200987 data = data[:self._left]
988 self._left -= len(data)
989 if self._left <= 0:
990 self._eof = True
991 self._update_crc(data)
992 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000993
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200994 def _read2(self, n):
995 if self._compress_left <= 0:
996 return b''
997
998 n = max(n, self.MIN_READ_SIZE)
999 n = min(n, self._compress_left)
1000
1001 data = self._fileobj.read(n)
1002 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001003 if not data:
1004 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001005
1006 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001007 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001008 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001009
Łukasz Langae94980a2010-11-22 23:31:26 +00001010 def close(self):
1011 try:
1012 if self._close_fileobj:
1013 self._fileobj.close()
1014 finally:
1015 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001016
John Jolly066df4f2018-01-30 01:51:35 -07001017 def seekable(self):
1018 return self._seekable
1019
1020 def seek(self, offset, whence=0):
1021 if not self._seekable:
1022 raise io.UnsupportedOperation("underlying stream is not seekable")
1023 curr_pos = self.tell()
1024 if whence == 0: # Seek from start of file
1025 new_pos = offset
1026 elif whence == 1: # Seek from current position
1027 new_pos = curr_pos + offset
1028 elif whence == 2: # Seek from EOF
1029 new_pos = self._orig_file_size + offset
1030 else:
1031 raise ValueError("whence must be os.SEEK_SET (0), "
1032 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1033
1034 if new_pos > self._orig_file_size:
1035 new_pos = self._orig_file_size
1036
1037 if new_pos < 0:
1038 new_pos = 0
1039
1040 read_offset = new_pos - curr_pos
1041 buff_offset = read_offset + self._offset
1042
1043 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1044 # Just move the _offset index if the new position is in the _readbuffer
1045 self._offset = buff_offset
1046 read_offset = 0
1047 elif read_offset < 0:
1048 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001049 self._fileobj.seek(self._orig_compress_start)
1050 self._running_crc = self._orig_start_crc
1051 self._compress_left = self._orig_compress_size
1052 self._left = self._orig_file_size
1053 self._readbuffer = b''
1054 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001055 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001056 self._eof = False
1057 read_offset = new_pos
1058
1059 while read_offset > 0:
1060 read_len = min(self.MAX_SEEK_READ, read_offset)
1061 self.read(read_len)
1062 read_offset -= read_len
1063
1064 return self.tell()
1065
1066 def tell(self):
1067 if not self._seekable:
1068 raise io.UnsupportedOperation("underlying stream is not seekable")
1069 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1070 return filepos
1071
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001072
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001073class _ZipWriteFile(io.BufferedIOBase):
1074 def __init__(self, zf, zinfo, zip64):
1075 self._zinfo = zinfo
1076 self._zip64 = zip64
1077 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001078 self._compressor = _get_compressor(zinfo.compress_type,
1079 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001080 self._file_size = 0
1081 self._compress_size = 0
1082 self._crc = 0
1083
1084 @property
1085 def _fileobj(self):
1086 return self._zipfile.fp
1087
1088 def writable(self):
1089 return True
1090
1091 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001092 if self.closed:
1093 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001094 nbytes = len(data)
1095 self._file_size += nbytes
1096 self._crc = crc32(data, self._crc)
1097 if self._compressor:
1098 data = self._compressor.compress(data)
1099 self._compress_size += len(data)
1100 self._fileobj.write(data)
1101 return nbytes
1102
1103 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001104 if self.closed:
1105 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001106 super().close()
1107 # Flush any data from the compressor, and update header info
1108 if self._compressor:
1109 buf = self._compressor.flush()
1110 self._compress_size += len(buf)
1111 self._fileobj.write(buf)
1112 self._zinfo.compress_size = self._compress_size
1113 else:
1114 self._zinfo.compress_size = self._file_size
1115 self._zinfo.CRC = self._crc
1116 self._zinfo.file_size = self._file_size
1117
1118 # Write updated header info
1119 if self._zinfo.flag_bits & 0x08:
1120 # Write CRC and file sizes after the file data
1121 fmt = '<LQQ' if self._zip64 else '<LLL'
1122 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1123 self._zinfo.compress_size, self._zinfo.file_size))
1124 self._zipfile.start_dir = self._fileobj.tell()
1125 else:
1126 if not self._zip64:
1127 if self._file_size > ZIP64_LIMIT:
1128 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1129 'limit')
1130 if self._compress_size > ZIP64_LIMIT:
1131 raise RuntimeError('Compressed size unexpectedly exceeded '
1132 'ZIP64 limit')
1133 # Seek backwards and write file header (which will now include
1134 # correct CRC and file sizes)
1135
1136 # Preserve current position in file
1137 self._zipfile.start_dir = self._fileobj.tell()
1138 self._fileobj.seek(self._zinfo.header_offset)
1139 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1140 self._fileobj.seek(self._zipfile.start_dir)
1141
1142 self._zipfile._writing = False
1143
1144 # Successfully written: Add file to our caches
1145 self._zipfile.filelist.append(self._zinfo)
1146 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1147
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001149 """ Class with methods to open, read, write, close, list zip files.
1150
Bo Baylesce237c72018-01-29 23:54:07 -06001151 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1152 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001153
Fred Drake3d9091e2001-03-26 15:49:24 +00001154 file: Either the path to the file, or a file-like object.
1155 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001156 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1157 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001158 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1159 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001160 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1161 needed, otherwise it will raise an exception when this would
1162 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001163 compresslevel: None (default for the given compression type) or an integer
1164 specifying the level to pass to the compressor.
1165 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1166 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1167 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001168
Fred Drake3d9091e2001-03-26 15:49:24 +00001169 """
Fred Drake484d7352000-10-02 21:14:52 +00001170
Fred Drake90eac282001-02-28 05:29:34 +00001171 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001172 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001173
Bo Baylesce237c72018-01-29 23:54:07 -06001174 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001175 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001176 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1177 or append 'a'."""
1178 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001179 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001180
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001181 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001182
1183 self._allowZip64 = allowZip64
1184 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001185 self.debug = 0 # Level of printing: 0 through 3
1186 self.NameToInfo = {} # Find file info given name
1187 self.filelist = [] # List of ZipInfo instances for archive
1188 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001189 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001190 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001191 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001192 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001193 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001194
Fred Drake3d9091e2001-03-26 15:49:24 +00001195 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001196 if isinstance(file, os.PathLike):
1197 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001198 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001199 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001200 self._filePassed = 0
1201 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001202 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1203 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001204 filemode = modeDict[mode]
1205 while True:
1206 try:
1207 self.fp = io.open(file, filemode)
1208 except OSError:
1209 if filemode in modeDict:
1210 filemode = modeDict[filemode]
1211 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001212 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001213 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001214 else:
1215 self._filePassed = 1
1216 self.fp = file
1217 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001218 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001219 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001220 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001221 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001222
Antoine Pitrou17babc52012-11-17 23:50:08 +01001223 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001224 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001225 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001226 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001227 # set the modified flag so central directory gets written
1228 # even if no files are added to the archive
1229 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001230 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001231 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001232 except (AttributeError, OSError):
1233 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001234 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001235 self._seekable = False
1236 else:
1237 # Some file-like objects can provide tell() but not seek()
1238 try:
1239 self.fp.seek(self.start_dir)
1240 except (AttributeError, OSError):
1241 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001242 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001243 try:
1244 # See if file is a zip file
1245 self._RealGetContents()
1246 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001247 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001248 except BadZipFile:
1249 # file is not a zip file, just append
1250 self.fp.seek(0, 2)
1251
1252 # set the modified flag so central directory gets written
1253 # even if no files are added to the archive
1254 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001255 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001256 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001257 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001258 except:
1259 fp = self.fp
1260 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001261 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001262 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001264 def __enter__(self):
1265 return self
1266
1267 def __exit__(self, type, value, traceback):
1268 self.close()
1269
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001270 def __repr__(self):
1271 result = ['<%s.%s' % (self.__class__.__module__,
1272 self.__class__.__qualname__)]
1273 if self.fp is not None:
1274 if self._filePassed:
1275 result.append(' file=%r' % self.fp)
1276 elif self.filename is not None:
1277 result.append(' filename=%r' % self.filename)
1278 result.append(' mode=%r' % self.mode)
1279 else:
1280 result.append(' [closed]')
1281 result.append('>')
1282 return ''.join(result)
1283
Tim Peters7d3bad62001-04-04 18:56:49 +00001284 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001285 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001286 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001287 try:
1288 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001289 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001290 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001291 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001292 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001294 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001295 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1296 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001297 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001298
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001299 # "concat" is zero, unless zip was concatenated to another file
1300 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001301 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1302 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001303 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001304
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001306 inferred = concat + offset_cd
1307 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001309 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001311 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001312 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 total = 0
1314 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001315 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001316 if len(centdir) != sizeCentralDir:
1317 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001319 if centdir[_CD_SIGNATURE] != stringCentralDir:
1320 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001322 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001323 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001324 flags = centdir[5]
1325 if flags & 0x800:
1326 # UTF-8 file names extension
1327 filename = filename.decode('utf-8')
1328 else:
1329 # Historical ZIP filename encoding
1330 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001331 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001332 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001333 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1334 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001335 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001337 x.flag_bits, x.compress_type, t, d,
1338 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001339 if x.extract_version > MAX_EXTRACT_VERSION:
1340 raise NotImplementedError("zip file version %.1f" %
1341 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1343 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001344 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001346 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001347
1348 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001349 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 self.filelist.append(x)
1351 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001352
1353 # update total bytes read from central directory
1354 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1355 + centdir[_CD_EXTRA_FIELD_LENGTH]
1356 + centdir[_CD_COMMENT_LENGTH])
1357
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001359 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001360
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001361
1362 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001363 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001364 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001365
1366 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001367 """Return a list of class ZipInfo instances for files in the
1368 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001369 return self.filelist
1370
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001371 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001372 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001373 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1374 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001375 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001376 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001377 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1378 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379
1380 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001381 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001382 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001383 for zinfo in self.filelist:
1384 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001385 # Read by chunks, to avoid an OverflowError or a
1386 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001387 with self.open(zinfo.filename, "r") as f:
1388 while f.read(chunk_size): # Check CRC-32
1389 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001390 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001391 return zinfo.filename
1392
1393 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001394 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001395 info = self.NameToInfo.get(name)
1396 if info is None:
1397 raise KeyError(
1398 'There is no item named %r in the archive' % name)
1399
1400 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001401
Thomas Wouterscf297e42007-02-23 15:07:44 +00001402 def setpassword(self, pwd):
1403 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001404 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001405 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001406 if pwd:
1407 self.pwd = pwd
1408 else:
1409 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001410
R David Murrayf50b38a2012-04-12 18:44:58 -04001411 @property
1412 def comment(self):
1413 """The comment text associated with the ZIP file."""
1414 return self._comment
1415
1416 @comment.setter
1417 def comment(self, comment):
1418 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001419 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001420 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001421 if len(comment) > ZIP_MAX_COMMENT:
1422 import warnings
1423 warnings.warn('Archive comment is too long; truncating to %d bytes'
1424 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001425 comment = comment[:ZIP_MAX_COMMENT]
1426 self._comment = comment
1427 self._didModify = True
1428
Thomas Wouterscf297e42007-02-23 15:07:44 +00001429 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001430 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001431 with self.open(name, "r", pwd) as fp:
1432 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001433
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001434 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001435 """Return file-like object for 'name'.
1436
1437 name is a string for the file name within the ZIP file, or a ZipInfo
1438 object.
1439
1440 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1441 write to a file newly added to the archive.
1442
1443 pwd is the password to decrypt files (only used for reading).
1444
1445 When writing, if the file size is not known in advance but may exceed
1446 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1447 files. If the size is known in advance, it is best to pass a ZipInfo
1448 instance for name, with zinfo.file_size set.
1449 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001450 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001451 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001452 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001453 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001454 if pwd and (mode == "w"):
1455 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001456 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001457 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001458 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001459
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001460 # Make sure we have an info object
1461 if isinstance(name, ZipInfo):
1462 # 'name' is already an info object
1463 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001464 elif mode == 'w':
1465 zinfo = ZipInfo(name)
1466 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001467 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001468 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001469 # Get info object for name
1470 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001471
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001472 if mode == 'w':
1473 return self._open_to_write(zinfo, force_zip64=force_zip64)
1474
1475 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001476 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001477 "is an open writing handle on it. "
1478 "Close the writing handle before trying to read.")
1479
1480 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001481 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001482 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1483 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001484 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001485 # Skip the file header:
1486 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001487 if len(fheader) != sizeFileHeader:
1488 raise BadZipFile("Truncated file header")
1489 fheader = struct.unpack(structFileHeader, fheader)
1490 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001491 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001492
Antoine Pitrou17babc52012-11-17 23:50:08 +01001493 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1494 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1495 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001496
Antoine Pitrou8572da52012-11-17 23:52:05 +01001497 if zinfo.flag_bits & 0x20:
1498 # Zip 2.7: compressed patched data
1499 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001500
Antoine Pitrou8572da52012-11-17 23:52:05 +01001501 if zinfo.flag_bits & 0x40:
1502 # strong encryption
1503 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001504
Antoine Pitrou17babc52012-11-17 23:50:08 +01001505 if zinfo.flag_bits & 0x800:
1506 # UTF-8 filename
1507 fname_str = fname.decode("utf-8")
1508 else:
1509 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001510
Antoine Pitrou17babc52012-11-17 23:50:08 +01001511 if fname_str != zinfo.orig_filename:
1512 raise BadZipFile(
1513 'File name in directory %r and header %r differ.'
1514 % (zinfo.orig_filename, fname))
1515
1516 # check for encrypted flag & handle password
1517 is_encrypted = zinfo.flag_bits & 0x1
1518 zd = None
1519 if is_encrypted:
1520 if not pwd:
1521 pwd = self.pwd
1522 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001523 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001524 "required for extraction" % name)
1525
1526 zd = _ZipDecrypter(pwd)
1527 # The first 12 bytes in the cypher stream is an encryption header
1528 # used to strengthen the algorithm. The first 11 bytes are
1529 # completely random, while the 12th contains the MSB of the CRC,
1530 # or the MSB of the file time depending on the header type
1531 # and is used to check the correctness of the password.
1532 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001533 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001534 if zinfo.flag_bits & 0x8:
1535 # compare against the file type from extended local headers
1536 check_byte = (zinfo._raw_time >> 8) & 0xff
1537 else:
1538 # compare against the CRC otherwise
1539 check_byte = (zinfo.CRC >> 24) & 0xff
1540 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001541 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001542
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001543 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001544 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001545 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001546 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001547
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001548 def _open_to_write(self, zinfo, force_zip64=False):
1549 if force_zip64 and not self._allowZip64:
1550 raise ValueError(
1551 "force_zip64 is True, but allowZip64 was False when opening "
1552 "the ZIP file."
1553 )
1554 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001555 raise ValueError("Can't write to the ZIP file while there is "
1556 "another write handle open on it. "
1557 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001558
1559 # Sizes and CRC are overwritten with correct data after processing the file
1560 if not hasattr(zinfo, 'file_size'):
1561 zinfo.file_size = 0
1562 zinfo.compress_size = 0
1563 zinfo.CRC = 0
1564
1565 zinfo.flag_bits = 0x00
1566 if zinfo.compress_type == ZIP_LZMA:
1567 # Compressed data includes an end-of-stream (EOS) marker
1568 zinfo.flag_bits |= 0x02
1569 if not self._seekable:
1570 zinfo.flag_bits |= 0x08
1571
1572 if not zinfo.external_attr:
1573 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1574
1575 # Compressed size can be larger than uncompressed size
1576 zip64 = self._allowZip64 and \
1577 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1578
1579 if self._seekable:
1580 self.fp.seek(self.start_dir)
1581 zinfo.header_offset = self.fp.tell()
1582
1583 self._writecheck(zinfo)
1584 self._didModify = True
1585
1586 self.fp.write(zinfo.FileHeader(zip64))
1587
1588 self._writing = True
1589 return _ZipWriteFile(self, zinfo, zip64)
1590
Christian Heimes790c8232008-01-07 21:14:23 +00001591 def extract(self, member, path=None, pwd=None):
1592 """Extract a member from the archive to the current working directory,
1593 using its full name. Its file information is extracted as accurately
1594 as possible. `member' may be a filename or a ZipInfo object. You can
1595 specify a different directory using `path'.
1596 """
Christian Heimes790c8232008-01-07 21:14:23 +00001597 if path is None:
1598 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001599 else:
1600 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001601
1602 return self._extract_member(member, path, pwd)
1603
1604 def extractall(self, path=None, members=None, pwd=None):
1605 """Extract all members from the archive to the current working
1606 directory. `path' specifies a different directory to extract to.
1607 `members' is optional and must be a subset of the list returned
1608 by namelist().
1609 """
1610 if members is None:
1611 members = self.namelist()
1612
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001613 if path is None:
1614 path = os.getcwd()
1615 else:
1616 path = os.fspath(path)
1617
Christian Heimes790c8232008-01-07 21:14:23 +00001618 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001619 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001620
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001621 @classmethod
1622 def _sanitize_windows_name(cls, arcname, pathsep):
1623 """Replace bad characters and remove trailing dots from parts."""
1624 table = cls._windows_illegal_name_trans_table
1625 if not table:
1626 illegal = ':<>|"?*'
1627 table = str.maketrans(illegal, '_' * len(illegal))
1628 cls._windows_illegal_name_trans_table = table
1629 arcname = arcname.translate(table)
1630 # remove trailing dots
1631 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1632 # rejoin, removing empty parts.
1633 arcname = pathsep.join(x for x in arcname if x)
1634 return arcname
1635
Christian Heimes790c8232008-01-07 21:14:23 +00001636 def _extract_member(self, member, targetpath, pwd):
1637 """Extract the ZipInfo object 'member' to a physical
1638 file on the path targetpath.
1639 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001640 if not isinstance(member, ZipInfo):
1641 member = self.getinfo(member)
1642
Christian Heimes790c8232008-01-07 21:14:23 +00001643 # build the destination pathname, replacing
1644 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001645 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001646
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001647 if os.path.altsep:
1648 arcname = arcname.replace(os.path.altsep, os.path.sep)
1649 # interpret absolute pathname as relative, remove drive letter or
1650 # UNC path, redundant separators, "." and ".." components.
1651 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001652 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001653 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001654 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001655 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001656 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001657 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001658
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001659 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001660 targetpath = os.path.normpath(targetpath)
1661
1662 # Create all upper directories if necessary.
1663 upperdirs = os.path.dirname(targetpath)
1664 if upperdirs and not os.path.exists(upperdirs):
1665 os.makedirs(upperdirs)
1666
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001667 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001668 if not os.path.isdir(targetpath):
1669 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001670 return targetpath
1671
Antoine Pitrou17babc52012-11-17 23:50:08 +01001672 with self.open(member, pwd=pwd) as source, \
1673 open(targetpath, "wb") as target:
1674 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001675
1676 return targetpath
1677
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001678 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001679 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001680 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001681 import warnings
1682 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001683 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001684 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001685 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001686 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001687 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001688 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001689 if not self._allowZip64:
1690 requires_zip64 = None
1691 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1692 requires_zip64 = "Files count"
1693 elif zinfo.file_size > ZIP64_LIMIT:
1694 requires_zip64 = "Filesize"
1695 elif zinfo.header_offset > ZIP64_LIMIT:
1696 requires_zip64 = "Zipfile size"
1697 if requires_zip64:
1698 raise LargeZipFile(requires_zip64 +
1699 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001700
Bo Baylesce237c72018-01-29 23:54:07 -06001701 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001702 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001703 """Put the bytes from filename into the archive under the name
1704 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001705 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001706 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001707 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001708 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001709 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001710 "Can't write to ZIP archive while an open writing handle exists"
1711 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001712
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001713 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001714 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001715
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001716 if zinfo.is_dir():
1717 zinfo.compress_size = 0
1718 zinfo.CRC = 0
1719 else:
1720 if compress_type is not None:
1721 zinfo.compress_type = compress_type
1722 else:
1723 zinfo.compress_type = self.compression
1724
Bo Baylesce237c72018-01-29 23:54:07 -06001725 if compresslevel is not None:
1726 zinfo._compresslevel = compresslevel
1727 else:
1728 zinfo._compresslevel = self.compresslevel
1729
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001730 if zinfo.is_dir():
1731 with self._lock:
1732 if self._seekable:
1733 self.fp.seek(self.start_dir)
1734 zinfo.header_offset = self.fp.tell() # Start of header bytes
1735 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001736 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001737 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001738
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001739 self._writecheck(zinfo)
1740 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001741
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001742 self.filelist.append(zinfo)
1743 self.NameToInfo[zinfo.filename] = zinfo
1744 self.fp.write(zinfo.FileHeader(False))
1745 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001746 else:
1747 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1748 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001749
Bo Baylesce237c72018-01-29 23:54:07 -06001750 def writestr(self, zinfo_or_arcname, data,
1751 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001752 """Write a file into the archive. The contents is 'data', which
1753 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1754 it is encoded as UTF-8 first.
1755 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001756 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001757 if isinstance(data, str):
1758 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001759 if not isinstance(zinfo_or_arcname, ZipInfo):
1760 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001761 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001762 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001763 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001764 if zinfo.filename[-1] == '/':
1765 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1766 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1767 else:
1768 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001769 else:
1770 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001771
1772 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001773 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001774 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001775 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001776 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001777 "Can't write to ZIP archive while an open writing handle exists."
1778 )
1779
1780 if compress_type is not None:
1781 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001782
Bo Baylesce237c72018-01-29 23:54:07 -06001783 if compresslevel is not None:
1784 zinfo._compresslevel = compresslevel
1785
Guido van Rossum85825dc2007-08-27 17:03:28 +00001786 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001787 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001788 with self.open(zinfo, mode='w') as dest:
1789 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001790
1791 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001792 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001793 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001794
1795 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001796 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001797 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001798 if self.fp is None:
1799 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001800
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001801 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001802 raise ValueError("Can't close the ZIP file while there is "
1803 "an open writing handle on it. "
1804 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001805
Antoine Pitrou17babc52012-11-17 23:50:08 +01001806 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001807 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001808 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001809 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001810 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001811 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001812 finally:
1813 fp = self.fp
1814 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001815 self._fpclose(fp)
1816
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001817 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001818 for zinfo in self.filelist: # write central directory
1819 dt = zinfo.date_time
1820 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1821 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1822 extra = []
1823 if zinfo.file_size > ZIP64_LIMIT \
1824 or zinfo.compress_size > ZIP64_LIMIT:
1825 extra.append(zinfo.file_size)
1826 extra.append(zinfo.compress_size)
1827 file_size = 0xffffffff
1828 compress_size = 0xffffffff
1829 else:
1830 file_size = zinfo.file_size
1831 compress_size = zinfo.compress_size
1832
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001833 if zinfo.header_offset > ZIP64_LIMIT:
1834 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001835 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001836 else:
1837 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001838
1839 extra_data = zinfo.extra
1840 min_version = 0
1841 if extra:
1842 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001843 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001844 extra_data = struct.pack(
1845 '<HH' + 'Q'*len(extra),
1846 1, 8*len(extra), *extra) + extra_data
1847
1848 min_version = ZIP64_VERSION
1849
1850 if zinfo.compress_type == ZIP_BZIP2:
1851 min_version = max(BZIP2_VERSION, min_version)
1852 elif zinfo.compress_type == ZIP_LZMA:
1853 min_version = max(LZMA_VERSION, min_version)
1854
1855 extract_version = max(min_version, zinfo.extract_version)
1856 create_version = max(min_version, zinfo.create_version)
1857 try:
1858 filename, flag_bits = zinfo._encodeFilenameFlags()
1859 centdir = struct.pack(structCentralDir,
1860 stringCentralDir, create_version,
1861 zinfo.create_system, extract_version, zinfo.reserved,
1862 flag_bits, zinfo.compress_type, dostime, dosdate,
1863 zinfo.CRC, compress_size, file_size,
1864 len(filename), len(extra_data), len(zinfo.comment),
1865 0, zinfo.internal_attr, zinfo.external_attr,
1866 header_offset)
1867 except DeprecationWarning:
1868 print((structCentralDir, stringCentralDir, create_version,
1869 zinfo.create_system, extract_version, zinfo.reserved,
1870 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1871 zinfo.CRC, compress_size, file_size,
1872 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1873 0, zinfo.internal_attr, zinfo.external_attr,
1874 header_offset), file=sys.stderr)
1875 raise
1876 self.fp.write(centdir)
1877 self.fp.write(filename)
1878 self.fp.write(extra_data)
1879 self.fp.write(zinfo.comment)
1880
1881 pos2 = self.fp.tell()
1882 # Write end-of-zip-archive record
1883 centDirCount = len(self.filelist)
1884 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001885 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001886 requires_zip64 = None
1887 if centDirCount > ZIP_FILECOUNT_LIMIT:
1888 requires_zip64 = "Files count"
1889 elif centDirOffset > ZIP64_LIMIT:
1890 requires_zip64 = "Central directory offset"
1891 elif centDirSize > ZIP64_LIMIT:
1892 requires_zip64 = "Central directory size"
1893 if requires_zip64:
1894 # Need to write the ZIP64 end-of-archive records
1895 if not self._allowZip64:
1896 raise LargeZipFile(requires_zip64 +
1897 " would require ZIP64 extensions")
1898 zip64endrec = struct.pack(
1899 structEndArchive64, stringEndArchive64,
1900 44, 45, 45, 0, 0, centDirCount, centDirCount,
1901 centDirSize, centDirOffset)
1902 self.fp.write(zip64endrec)
1903
1904 zip64locrec = struct.pack(
1905 structEndArchive64Locator,
1906 stringEndArchive64Locator, 0, pos2, 1)
1907 self.fp.write(zip64locrec)
1908 centDirCount = min(centDirCount, 0xFFFF)
1909 centDirSize = min(centDirSize, 0xFFFFFFFF)
1910 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1911
1912 endrec = struct.pack(structEndArchive, stringEndArchive,
1913 0, 0, centDirCount, centDirCount,
1914 centDirSize, centDirOffset, len(self._comment))
1915 self.fp.write(endrec)
1916 self.fp.write(self._comment)
1917 self.fp.flush()
1918
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001919 def _fpclose(self, fp):
1920 assert self._fileRefCnt > 0
1921 self._fileRefCnt -= 1
1922 if not self._fileRefCnt and not self._filePassed:
1923 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001924
1925
1926class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001927 """Class to create ZIP archives with Python library files and packages."""
1928
Georg Brandl8334fd92010-12-04 10:26:46 +00001929 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001930 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001931 ZipFile.__init__(self, file, mode=mode, compression=compression,
1932 allowZip64=allowZip64)
1933 self._optimize = optimize
1934
Christian Tismer59202e52013-10-21 03:59:23 +02001935 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001936 """Add all files from "pathname" to the ZIP archive.
1937
Fred Drake484d7352000-10-02 21:14:52 +00001938 If pathname is a package directory, search the directory and
1939 all package subdirectories recursively for all *.py and enter
1940 the modules into the archive. If pathname is a plain
1941 directory, listdir *.py and enter all modules. Else, pathname
1942 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001943 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001944 This method will compile the module.py into module.pyc if
1945 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001946 If filterfunc(pathname) is given, it is called with every argument.
1947 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001948 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001949 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001950 if filterfunc and not filterfunc(pathname):
1951 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001952 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001953 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001954 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001955 dir, name = os.path.split(pathname)
1956 if os.path.isdir(pathname):
1957 initname = os.path.join(pathname, "__init__.py")
1958 if os.path.isfile(initname):
1959 # This is a package directory, add it
1960 if basename:
1961 basename = "%s/%s" % (basename, name)
1962 else:
1963 basename = name
1964 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001965 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001966 fname, arcname = self._get_codename(initname[0:-3], basename)
1967 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001968 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001969 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001970 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001971 dirlist.remove("__init__.py")
1972 # Add all *.py files and package subdirectories
1973 for filename in dirlist:
1974 path = os.path.join(pathname, filename)
1975 root, ext = os.path.splitext(filename)
1976 if os.path.isdir(path):
1977 if os.path.isfile(os.path.join(path, "__init__.py")):
1978 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001979 self.writepy(path, basename,
1980 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001981 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001982 if filterfunc and not filterfunc(path):
1983 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001984 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001985 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001986 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001987 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001988 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001989 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001990 self.write(fname, arcname)
1991 else:
1992 # This is NOT a package directory, add its files at top level
1993 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001994 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001995 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001996 path = os.path.join(pathname, filename)
1997 root, ext = os.path.splitext(filename)
1998 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001999 if filterfunc and not filterfunc(path):
2000 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002001 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002002 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002003 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002004 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002005 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002006 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002007 self.write(fname, arcname)
2008 else:
2009 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002010 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002011 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002012 fname, arcname = self._get_codename(pathname[0:-3], basename)
2013 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002014 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002015 self.write(fname, arcname)
2016
2017 def _get_codename(self, pathname, basename):
2018 """Return (filename, archivename) for the path.
2019
Fred Drake484d7352000-10-02 21:14:52 +00002020 Given a module name path, return the correct file path and
2021 archive name, compiling if necessary. For example, given
2022 /python/lib/string, return (/python/lib/string.pyc, string).
2023 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002024 def _compile(file, optimize=-1):
2025 import py_compile
2026 if self.debug:
2027 print("Compiling", file)
2028 try:
2029 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002030 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002031 print(err.msg)
2032 return False
2033 return True
2034
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002035 file_py = pathname + ".py"
2036 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002037 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2038 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2039 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002040 if self._optimize == -1:
2041 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002042 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002043 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2044 # Use .pyc file.
2045 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002046 elif (os.path.isfile(pycache_opt0) and
2047 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002048 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2049 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002050 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002051 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002052 elif (os.path.isfile(pycache_opt1) and
2053 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2054 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002055 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002056 fname = pycache_opt1
2057 arcname = file_pyc
2058 elif (os.path.isfile(pycache_opt2) and
2059 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2060 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2061 # file name in the archive.
2062 fname = pycache_opt2
2063 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002064 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002065 # Compile py into PEP 3147 pyc file.
2066 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002067 if sys.flags.optimize == 0:
2068 fname = pycache_opt0
2069 elif sys.flags.optimize == 1:
2070 fname = pycache_opt1
2071 else:
2072 fname = pycache_opt2
2073 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002074 else:
2075 fname = arcname = file_py
2076 else:
2077 # new mode: use given optimization level
2078 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002079 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002080 arcname = file_pyc
2081 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002082 arcname = file_pyc
2083 if self._optimize == 1:
2084 fname = pycache_opt1
2085 elif self._optimize == 2:
2086 fname = pycache_opt2
2087 else:
2088 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2089 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002090 if not (os.path.isfile(fname) and
2091 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2092 if not _compile(file_py, optimize=self._optimize):
2093 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002094 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002095 if basename:
2096 archivename = "%s/%s" % (basename, archivename)
2097 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002098
2099
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002100def main(args=None):
2101 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002102
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002103 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002104 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002105 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002106 group.add_argument('-l', '--list', metavar='<zipfile>',
2107 help='Show listing of a zipfile')
2108 group.add_argument('-e', '--extract', nargs=2,
2109 metavar=('<zipfile>', '<output_dir>'),
2110 help='Extract zipfile into target dir')
2111 group.add_argument('-c', '--create', nargs='+',
2112 metavar=('<name>', '<file>'),
2113 help='Create zipfile from sources')
2114 group.add_argument('-t', '--test', metavar='<zipfile>',
2115 help='Test if a zipfile is valid')
2116 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002117
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002118 if args.test is not None:
2119 src = args.test
2120 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002121 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002122 if badfile:
2123 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002124 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002125
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002126 elif args.list is not None:
2127 src = args.list
2128 with ZipFile(src, 'r') as zf:
2129 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002130
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002131 elif args.extract is not None:
2132 src, curdir = args.extract
2133 with ZipFile(src, 'r') as zf:
2134 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002135
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002136 elif args.create is not None:
2137 zip_name = args.create.pop(0)
2138 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002139
2140 def addToZip(zf, path, zippath):
2141 if os.path.isfile(path):
2142 zf.write(path, zippath, ZIP_DEFLATED)
2143 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002144 if zippath:
2145 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002146 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002147 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002148 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002149 # else: ignore
2150
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002151 with ZipFile(zip_name, 'w') as zf:
2152 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002153 zippath = os.path.basename(path)
2154 if not zippath:
2155 zippath = os.path.basename(os.path.dirname(path))
2156 if zippath in ('', os.curdir, os.pardir):
2157 zippath = ''
2158 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002159
2160if __name__ == "__main__":
2161 main()