blob: 2dc016472117992bc1b9413983ffea9ee6ca381e [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Silas Sewell4ba3b502018-09-18 13:00:05 -0400162_DD_SIGNATURE = 0x08074b50
163
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +0300164_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
165
166def _strip_extra(extra, xids):
167 # Remove Extra Fields with specified IDs.
168 unpack = _EXTRA_FIELD_STRUCT.unpack
169 modified = False
170 buffer = []
171 start = i = 0
172 while i + 4 <= len(extra):
173 xid, xlen = unpack(extra[i : i + 4])
174 j = i + 4 + xlen
175 if xid in xids:
176 if i != start:
177 buffer.append(extra[start : i])
178 start = j
179 modified = True
180 i = j
181 if not modified:
182 return extra
183 return b''.join(buffer)
184
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000186 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000187 if _EndRecData(fp):
188 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200189 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000190 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000191 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000193def is_zipfile(filename):
194 """Quickly see if a file is a ZIP file by checking the magic number.
195
196 The filename argument may be a file or file-like object too.
197 """
198 result = False
199 try:
200 if hasattr(filename, "read"):
201 result = _check_zipfile(fp=filename)
202 else:
203 with open(filename, "rb") as fp:
204 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200205 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000206 pass
207 return result
208
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209def _EndRecData64(fpin, offset, endrec):
210 """
211 Read the ZIP64 end-of-archive records and use that to update endrec
212 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000213 try:
214 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200215 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 # If the seek fails, the file is not large enough to contain a ZIP64
217 # end-of-archive record, so just return the end record we were given.
218 return endrec
219
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200221 if len(data) != sizeEndCentDir64Locator:
222 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000223 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
224 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000225 return endrec
226
227 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000228 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000229
230 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
232 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200233 if len(data) != sizeEndCentDir64:
234 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000235 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200236 dircount, dircount2, dirsize, diroffset = \
237 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000238 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000239 return endrec
240
241 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000242 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000243 endrec[_ECD_DISK_NUMBER] = disk_num
244 endrec[_ECD_DISK_START] = disk_dir
245 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
246 endrec[_ECD_ENTRIES_TOTAL] = dircount2
247 endrec[_ECD_SIZE] = dirsize
248 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000249 return endrec
250
251
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000252def _EndRecData(fpin):
253 """Return data from the "End of Central Directory" record, or None.
254
255 The data is a list of the nine items in the ZIP "End of central dir"
256 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
258 # Determine file size
259 fpin.seek(0, 2)
260 filesize = fpin.tell()
261
262 # Check to see if this is ZIP file with no archive comment (the
263 # "end of central directory" structure should be the last item in the
264 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000265 try:
266 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200267 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200270 if (len(data) == sizeEndCentDir and
271 data[0:4] == stringEndArchive and
272 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000273 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000274 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 endrec=list(endrec)
276
277 # Append a blank comment and record start offset
278 endrec.append(b"")
279 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000280
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000281 # Try to read the "Zip64 end of central directory" structure
282 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
284 # Either this is not a ZIP file, or it is a ZIP file with an archive
285 # comment. Search the end of the file for the "end of central directory"
286 # record signature. The comment is the last item in the ZIP file and may be
287 # up to 64K long. It is assumed that the "end of central directory" magic
288 # number does not appear in the comment.
289 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
290 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000291 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000292 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000293 if start >= 0:
294 # found the magic number; attempt to unpack and interpret
295 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200296 if len(recData) != sizeEndCentDir:
297 # Zip file is corrupted.
298 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000299 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400300 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
301 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
302 endrec.append(comment)
303 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000304
R David Murray4fbb9db2011-06-09 15:50:51 -0400305 # Try to read the "Zip64 end of central directory" structure
306 return _EndRecData64(fpin, maxCommentStart + start - filesize,
307 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000308
309 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200310 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000311
Fred Drake484d7352000-10-02 21:14:52 +0000312
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000313class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Class with attributes describing each file in the ZIP archive."""
315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200317 'orig_filename',
318 'filename',
319 'date_time',
320 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600321 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200322 'comment',
323 'extra',
324 'create_system',
325 'create_version',
326 'extract_version',
327 'reserved',
328 'flag_bits',
329 'volume',
330 'internal_attr',
331 'external_attr',
332 'header_offset',
333 'CRC',
334 'compress_size',
335 'file_size',
336 '_raw_time',
337 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000338
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000340 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
342 # Terminate the file name at the first null byte. Null bytes in file
343 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000344 null_byte = filename.find(chr(0))
345 if null_byte >= 0:
346 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000347 # This is used to ensure paths in generated ZIP files always use
348 # forward slashes as the directory separator, as required by the
349 # ZIP format specification.
350 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000351 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352
Greg Ward8e36d282003-06-18 00:53:06 +0000353 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000354 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800355
356 if date_time[0] < 1980:
357 raise ValueError('ZIP does not support timestamps before 1980')
358
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000360 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600361 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000362 self.comment = b"" # Comment for each file
363 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000364 if sys.platform == 'win32':
365 self.create_system = 0 # System which created ZIP archive
366 else:
367 # Assume everything else is unix-y
368 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200369 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
370 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000371 self.reserved = 0 # Must be zero
372 self.flag_bits = 0 # ZIP flag bits
373 self.volume = 0 # Volume number of file header
374 self.internal_attr = 0 # Internal attributes
375 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000376 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000377 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000378 # CRC CRC-32 of the uncompressed file
379 # compress_size Size of the compressed file
380 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200382 def __repr__(self):
383 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
384 if self.compress_type != ZIP_STORED:
385 result.append(' compress_type=%s' %
386 compressor_names.get(self.compress_type,
387 self.compress_type))
388 hi = self.external_attr >> 16
389 lo = self.external_attr & 0xFFFF
390 if hi:
391 result.append(' filemode=%r' % stat.filemode(hi))
392 if lo:
393 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200394 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200395 if not isdir or self.file_size:
396 result.append(' file_size=%r' % self.file_size)
397 if ((not isdir or self.compress_size) and
398 (self.compress_type != ZIP_STORED or
399 self.file_size != self.compress_size)):
400 result.append(' compress_size=%r' % self.compress_size)
401 result.append('>')
402 return ''.join(result)
403
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200404 def FileHeader(self, zip64=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +0200405 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000406 dt = self.date_time
407 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000408 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000410 # Set these to zero because we write them after the file data
411 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 else:
Tim Peterse1190062001-01-15 03:34:38 +0000413 CRC = self.CRC
414 compress_size = self.compress_size
415 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000416
417 extra = self.extra
418
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200419 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200420 if zip64 is None:
421 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
422 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000423 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000424 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200425 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200426 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
427 if not zip64:
428 raise LargeZipFile("Filesize would require ZIP64 extensions")
429 # File is larger than what fits into a 4 byte integer,
430 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000431 file_size = 0xffffffff
432 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200433 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000434
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200435 if self.compress_type == ZIP_BZIP2:
436 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200437 elif self.compress_type == ZIP_LZMA:
438 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200439
440 self.extract_version = max(min_version, self.extract_version)
441 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000442 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000443 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200444 self.extract_version, self.reserved, flag_bits,
445 self.compress_type, dostime, dosdate, CRC,
446 compress_size, file_size,
447 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000448 return header + filename + extra
449
450 def _encodeFilenameFlags(self):
451 try:
452 return self.filename.encode('ascii'), self.flag_bits
453 except UnicodeEncodeError:
454 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455
456 def _decodeExtra(self):
457 # Try to decode the extra field.
458 extra = self.extra
459 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700460 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000461 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200462 if ln+4 > len(extra):
463 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
464 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000466 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000468 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000470 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000471 elif ln == 0:
472 counts = ()
473 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300474 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475
476 idx = 0
477
478 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000479 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000480 self.file_size = counts[idx]
481 idx += 1
482
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000483 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000484 self.compress_size = counts[idx]
485 idx += 1
486
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000487 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000488 old = self.header_offset
489 self.header_offset = counts[idx]
490 idx+=1
491
492 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000493
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200494 @classmethod
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200495 def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200496 """Construct an appropriate ZipInfo for a file on the filesystem.
497
498 filename should be the path to a file or directory on the filesystem.
499
500 arcname is the name which it will have within the archive (by default,
501 this will be the same as filename, but without a drive letter and with
502 leading path separators removed).
503 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200504 if isinstance(filename, os.PathLike):
505 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200506 st = os.stat(filename)
507 isdir = stat.S_ISDIR(st.st_mode)
508 mtime = time.localtime(st.st_mtime)
509 date_time = mtime[0:6]
Marcel Plcha2fe1e52018-08-02 15:04:52 +0200510 if not strict_timestamps and date_time[0] < 1980:
511 date_time = (1980, 1, 1, 0, 0, 0)
512 elif not strict_timestamps and date_time[0] > 2107:
513 date_time = (2107, 12, 31, 23, 59, 59)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200514 # Create ZipInfo instance to store file information
515 if arcname is None:
516 arcname = filename
517 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
518 while arcname[0] in (os.sep, os.altsep):
519 arcname = arcname[1:]
520 if isdir:
521 arcname += '/'
522 zinfo = cls(arcname, date_time)
523 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
524 if isdir:
525 zinfo.file_size = 0
526 zinfo.external_attr |= 0x10 # MS-DOS directory flag
527 else:
528 zinfo.file_size = st.st_size
529
530 return zinfo
531
532 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300533 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200534 return self.filename[-1] == '/'
535
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000536
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300537# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
538# internal keys. We noticed that a direct implementation is faster than
539# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300541_crctable = None
542def _gen_crc(crc):
543 for j in range(8):
544 if crc & 1:
545 crc = (crc >> 1) ^ 0xEDB88320
546 else:
547 crc >>= 1
548 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000549
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300550# ZIP supports a password-based form of encryption. Even though known
551# plaintext attacks have been found against it, it is still useful
552# to be able to get data out of such a file.
553#
554# Usage:
555# zd = _ZipDecrypter(mypwd)
556# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000557
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300558def _ZipDecrypter(pwd):
559 key0 = 305419896
560 key1 = 591751049
561 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000562
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300563 global _crctable
564 if _crctable is None:
565 _crctable = list(map(_gen_crc, range(256)))
566 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300568 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000569 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300570 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000571
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300572 def update_keys(c):
573 nonlocal key0, key1, key2
574 key0 = crc32(c, key0)
575 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
576 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
577 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000578
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300579 for p in pwd:
580 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000581
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300582 def decrypter(data):
583 """Decrypt a bytes object."""
584 result = bytearray()
585 append = result.append
586 for c in data:
587 k = key2 | 2
588 c ^= ((k * (k^1)) >> 8) & 0xFF
589 update_keys(c)
590 append(c)
591 return bytes(result)
592
593 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000594
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200595
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200596class LZMACompressor:
597
598 def __init__(self):
599 self._comp = None
600
601 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200602 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200603 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200604 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200605 ])
606 return struct.pack('<BBH', 9, 4, len(props)) + props
607
608 def compress(self, data):
609 if self._comp is None:
610 return self._init() + self._comp.compress(data)
611 return self._comp.compress(data)
612
613 def flush(self):
614 if self._comp is None:
615 return self._init() + self._comp.flush()
616 return self._comp.flush()
617
618
619class LZMADecompressor:
620
621 def __init__(self):
622 self._decomp = None
623 self._unconsumed = b''
624 self.eof = False
625
626 def decompress(self, data):
627 if self._decomp is None:
628 self._unconsumed += data
629 if len(self._unconsumed) <= 4:
630 return b''
631 psize, = struct.unpack('<H', self._unconsumed[2:4])
632 if len(self._unconsumed) <= 4 + psize:
633 return b''
634
635 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200636 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
637 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200638 ])
639 data = self._unconsumed[4 + psize:]
640 del self._unconsumed
641
642 result = self._decomp.decompress(data)
643 self.eof = self._decomp.eof
644 return result
645
646
647compressor_names = {
648 0: 'store',
649 1: 'shrink',
650 2: 'reduce',
651 3: 'reduce',
652 4: 'reduce',
653 5: 'reduce',
654 6: 'implode',
655 7: 'tokenize',
656 8: 'deflate',
657 9: 'deflate64',
658 10: 'implode',
659 12: 'bzip2',
660 14: 'lzma',
661 18: 'terse',
662 19: 'lz77',
663 97: 'wavpack',
664 98: 'ppmd',
665}
666
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667def _check_compression(compression):
668 if compression == ZIP_STORED:
669 pass
670 elif compression == ZIP_DEFLATED:
671 if not zlib:
672 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200673 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200674 elif compression == ZIP_BZIP2:
675 if not bz2:
676 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200677 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200678 elif compression == ZIP_LZMA:
679 if not lzma:
680 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200681 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200682 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300683 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684
685
Bo Baylesce237c72018-01-29 23:54:07 -0600686def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600688 if compresslevel is not None:
689 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
690 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200691 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600692 if compresslevel is not None:
693 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600695 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200696 elif compress_type == ZIP_LZMA:
697 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200698 else:
699 return None
700
701
702def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200703 if compress_type == ZIP_STORED:
704 return None
705 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200706 return zlib.decompressobj(-15)
707 elif compress_type == ZIP_BZIP2:
708 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200709 elif compress_type == ZIP_LZMA:
710 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200711 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200712 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200713 if descr:
714 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
715 else:
716 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200717
718
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200719class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300720 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200721 self._file = file
722 self._pos = pos
723 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200724 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300725 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700726 self.seekable = file.seekable
727 self.tell = file.tell
728
729 def seek(self, offset, whence=0):
730 with self._lock:
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200731 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700732 raise ValueError("Can't reposition in the ZIP file while "
733 "there is an open writing handle on it. "
734 "Close the writing handle before trying to read.")
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +0200735 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700736 self._pos = self._file.tell()
737 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200738
739 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200740 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300741 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300742 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300743 "is an open writing handle on it. "
744 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200745 self._file.seek(self._pos)
746 data = self._file.read(n)
747 self._pos = self._file.tell()
748 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200749
750 def close(self):
751 if self._file is not None:
752 fileobj = self._file
753 self._file = None
754 self._close(fileobj)
755
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200756# Provide the tell method for unseekable stream
757class _Tellable:
758 def __init__(self, fp):
759 self.fp = fp
760 self.offset = 0
761
762 def write(self, data):
763 n = self.fp.write(data)
764 self.offset += n
765 return n
766
767 def tell(self):
768 return self.offset
769
770 def flush(self):
771 self.fp.flush()
772
773 def close(self):
774 self.fp.close()
775
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200776
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000778 """File-like object for reading an archive member.
779 Is returned by ZipFile.open().
780 """
781
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000782 # Max size supported by decompressor.
783 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000784
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000785 # Read from compressed files in 4k blocks.
786 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000787
John Jolly066df4f2018-01-30 01:51:35 -0700788 # Chunk size to read during seek
789 MAX_SEEK_READ = 1 << 24
790
Łukasz Langae94980a2010-11-22 23:31:26 +0000791 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
792 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793 self._fileobj = fileobj
794 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000795 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000796
Ezio Melotti92b47432010-01-28 01:44:41 +0000797 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000798 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000800
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200801 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200803 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804 self._readbuffer = b''
805 self._offset = 0
806
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000807 self.newlines = None
808
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000809 # Adjust read size for encrypted files since the first 12 bytes
810 # are for the encryption/password information.
811 if self._decrypter is not None:
812 self._compress_left -= 12
813
814 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000815 self.name = zipinfo.filename
816
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000817 if hasattr(zipinfo, 'CRC'):
818 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000819 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000820 else:
821 self._expected_crc = None
822
John Jolly066df4f2018-01-30 01:51:35 -0700823 self._seekable = False
824 try:
825 if fileobj.seekable():
826 self._orig_compress_start = fileobj.tell()
827 self._orig_compress_size = zipinfo.compress_size
828 self._orig_file_size = zipinfo.file_size
829 self._orig_start_crc = self._running_crc
830 self._seekable = True
831 except AttributeError:
832 pass
833
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200834 def __repr__(self):
835 result = ['<%s.%s' % (self.__class__.__module__,
836 self.__class__.__qualname__)]
837 if not self.closed:
838 result.append(' name=%r mode=%r' % (self.name, self.mode))
839 if self._compress_type != ZIP_STORED:
840 result.append(' compress_type=%s' %
841 compressor_names.get(self._compress_type,
842 self._compress_type))
843 else:
844 result.append(' [closed]')
845 result.append('>')
846 return ''.join(result)
847
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000848 def readline(self, limit=-1):
849 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000851 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853
Serhiy Storchakae670be22016-06-11 19:32:44 +0300854 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000855 # Shortcut common case - newline found in buffer.
856 i = self._readbuffer.find(b'\n', self._offset) + 1
857 if i > 0:
858 line = self._readbuffer[self._offset: i]
859 self._offset = i
860 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861
Serhiy Storchakae670be22016-06-11 19:32:44 +0300862 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000863
864 def peek(self, n=1):
865 """Returns buffered bytes without advancing the position."""
866 if n > len(self._readbuffer) - self._offset:
867 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200868 if len(chunk) > self._offset:
869 self._readbuffer = chunk + self._readbuffer[self._offset:]
870 self._offset = 0
871 else:
872 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000873
874 # Return up to 512 bytes to reduce allocation overhead for tight loops.
875 return self._readbuffer[self._offset: self._offset + 512]
876
877 def readable(self):
878 return True
879
880 def read(self, n=-1):
881 """Read and return up to n bytes.
nick sung53c29352019-03-15 03:26:25 +0800882 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200884 if n is None or n < 0:
885 buf = self._readbuffer[self._offset:]
886 self._readbuffer = b''
887 self._offset = 0
888 while not self._eof:
889 buf += self._read1(self.MAX_N)
890 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891
Antoine Pitrou78157b32012-06-23 16:44:48 +0200892 end = n + self._offset
893 if end < len(self._readbuffer):
894 buf = self._readbuffer[self._offset:end]
895 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200896 return buf
897
Antoine Pitrou78157b32012-06-23 16:44:48 +0200898 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200899 buf = self._readbuffer[self._offset:]
900 self._readbuffer = b''
901 self._offset = 0
902 while n > 0 and not self._eof:
903 data = self._read1(n)
904 if n < len(data):
905 self._readbuffer = data
906 self._offset = n
907 buf += data[:n]
908 break
909 buf += data
910 n -= len(data)
911 return buf
912
913 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000914 # Update the CRC using the given data.
915 if self._expected_crc is None:
916 # No need to compute the CRC if we don't have a reference value
917 return
Martin Panterb82032f2015-12-11 05:19:29 +0000918 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000919 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200920 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000921 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000922
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000923 def read1(self, n):
924 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000925
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200926 if n is None or n < 0:
927 buf = self._readbuffer[self._offset:]
928 self._readbuffer = b''
929 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300930 while not self._eof:
931 data = self._read1(self.MAX_N)
932 if data:
933 buf += data
934 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200935 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936
Antoine Pitrou78157b32012-06-23 16:44:48 +0200937 end = n + self._offset
938 if end < len(self._readbuffer):
939 buf = self._readbuffer[self._offset:end]
940 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200941 return buf
942
Antoine Pitrou78157b32012-06-23 16:44:48 +0200943 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200944 buf = self._readbuffer[self._offset:]
945 self._readbuffer = b''
946 self._offset = 0
947 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300948 while not self._eof:
949 data = self._read1(n)
950 if n < len(data):
951 self._readbuffer = data
952 self._offset = n
953 buf += data[:n]
954 break
955 if data:
956 buf += data
957 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200958 return buf
959
960 def _read1(self, n):
961 # Read up to n compressed bytes with at most one read() system call,
962 # decrypt and decompress them.
963 if self._eof or n <= 0:
964 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000966 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200967 if self._compress_type == ZIP_DEFLATED:
968 ## Handle unconsumed data.
969 data = self._decompressor.unconsumed_tail
970 if n > len(data):
971 data += self._read2(n - len(data))
972 else:
973 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000974
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200975 if self._compress_type == ZIP_STORED:
976 self._eof = self._compress_left <= 0
977 elif self._compress_type == ZIP_DEFLATED:
978 n = max(n, self.MIN_READ_SIZE)
979 data = self._decompressor.decompress(data, n)
980 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200981 self._compress_left <= 0 and
982 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200983 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000984 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200985 else:
986 data = self._decompressor.decompress(data)
987 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200989 data = data[:self._left]
990 self._left -= len(data)
991 if self._left <= 0:
992 self._eof = True
993 self._update_crc(data)
994 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000995
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200996 def _read2(self, n):
997 if self._compress_left <= 0:
998 return b''
999
1000 n = max(n, self.MIN_READ_SIZE)
1001 n = min(n, self._compress_left)
1002
1003 data = self._fileobj.read(n)
1004 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001005 if not data:
1006 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001007
1008 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001009 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001010 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011
Łukasz Langae94980a2010-11-22 23:31:26 +00001012 def close(self):
1013 try:
1014 if self._close_fileobj:
1015 self._fileobj.close()
1016 finally:
1017 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001018
John Jolly066df4f2018-01-30 01:51:35 -07001019 def seekable(self):
1020 return self._seekable
1021
1022 def seek(self, offset, whence=0):
1023 if not self._seekable:
1024 raise io.UnsupportedOperation("underlying stream is not seekable")
1025 curr_pos = self.tell()
1026 if whence == 0: # Seek from start of file
1027 new_pos = offset
1028 elif whence == 1: # Seek from current position
1029 new_pos = curr_pos + offset
1030 elif whence == 2: # Seek from EOF
1031 new_pos = self._orig_file_size + offset
1032 else:
1033 raise ValueError("whence must be os.SEEK_SET (0), "
1034 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1035
1036 if new_pos > self._orig_file_size:
1037 new_pos = self._orig_file_size
1038
1039 if new_pos < 0:
1040 new_pos = 0
1041
1042 read_offset = new_pos - curr_pos
1043 buff_offset = read_offset + self._offset
1044
1045 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1046 # Just move the _offset index if the new position is in the _readbuffer
1047 self._offset = buff_offset
1048 read_offset = 0
1049 elif read_offset < 0:
1050 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001051 self._fileobj.seek(self._orig_compress_start)
1052 self._running_crc = self._orig_start_crc
1053 self._compress_left = self._orig_compress_size
1054 self._left = self._orig_file_size
1055 self._readbuffer = b''
1056 self._offset = 0
Mickaël Schoentgen3f8c6912018-07-29 20:26:52 +02001057 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001058 self._eof = False
1059 read_offset = new_pos
1060
1061 while read_offset > 0:
1062 read_len = min(self.MAX_SEEK_READ, read_offset)
1063 self.read(read_len)
1064 read_offset -= read_len
1065
1066 return self.tell()
1067
1068 def tell(self):
1069 if not self._seekable:
1070 raise io.UnsupportedOperation("underlying stream is not seekable")
1071 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1072 return filepos
1073
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001074
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001075class _ZipWriteFile(io.BufferedIOBase):
1076 def __init__(self, zf, zinfo, zip64):
1077 self._zinfo = zinfo
1078 self._zip64 = zip64
1079 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001080 self._compressor = _get_compressor(zinfo.compress_type,
1081 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001082 self._file_size = 0
1083 self._compress_size = 0
1084 self._crc = 0
1085
1086 @property
1087 def _fileobj(self):
1088 return self._zipfile.fp
1089
1090 def writable(self):
1091 return True
1092
1093 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001094 if self.closed:
1095 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001096 nbytes = len(data)
1097 self._file_size += nbytes
1098 self._crc = crc32(data, self._crc)
1099 if self._compressor:
1100 data = self._compressor.compress(data)
1101 self._compress_size += len(data)
1102 self._fileobj.write(data)
1103 return nbytes
1104
1105 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001106 if self.closed:
1107 return
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001108 try:
1109 super().close()
1110 # Flush any data from the compressor, and update header info
1111 if self._compressor:
1112 buf = self._compressor.flush()
1113 self._compress_size += len(buf)
1114 self._fileobj.write(buf)
1115 self._zinfo.compress_size = self._compress_size
1116 else:
1117 self._zinfo.compress_size = self._file_size
1118 self._zinfo.CRC = self._crc
1119 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001120
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001121 # Write updated header info
1122 if self._zinfo.flag_bits & 0x08:
1123 # Write CRC and file sizes after the file data
1124 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1125 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1126 self._zinfo.compress_size, self._zinfo.file_size))
1127 self._zipfile.start_dir = self._fileobj.tell()
1128 else:
1129 if not self._zip64:
1130 if self._file_size > ZIP64_LIMIT:
1131 raise RuntimeError(
1132 'File size unexpectedly exceeded ZIP64 limit')
1133 if self._compress_size > ZIP64_LIMIT:
1134 raise RuntimeError(
1135 'Compressed size unexpectedly exceeded ZIP64 limit')
1136 # Seek backwards and write file header (which will now include
1137 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001138
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001139 # Preserve current position in file
1140 self._zipfile.start_dir = self._fileobj.tell()
1141 self._fileobj.seek(self._zinfo.header_offset)
1142 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1143 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001144
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001145 # Successfully written: Add file to our caches
1146 self._zipfile.filelist.append(self._zinfo)
1147 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1148 finally:
1149 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001150
Serhiy Storchaka2524fde2019-03-30 08:25:19 +02001151
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001152
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001153class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001154 """ Class with methods to open, read, write, close, list zip files.
1155
Bo Baylesce237c72018-01-29 23:54:07 -06001156 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1157 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001158
Fred Drake3d9091e2001-03-26 15:49:24 +00001159 file: Either the path to the file, or a file-like object.
1160 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001161 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1162 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001163 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1164 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001165 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1166 needed, otherwise it will raise an exception when this would
1167 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001168 compresslevel: None (default for the given compression type) or an integer
1169 specifying the level to pass to the compressor.
1170 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1171 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1172 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001173
Fred Drake3d9091e2001-03-26 15:49:24 +00001174 """
Fred Drake484d7352000-10-02 21:14:52 +00001175
Fred Drake90eac282001-02-28 05:29:34 +00001176 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001177 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001178
Bo Baylesce237c72018-01-29 23:54:07 -06001179 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
Marcel Plch77b112c2018-08-31 16:43:31 +02001180 compresslevel=None, *, strict_timestamps=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001181 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1182 or append 'a'."""
1183 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001184 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001185
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001186 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001187
1188 self._allowZip64 = allowZip64
1189 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001190 self.debug = 0 # Level of printing: 0 through 3
1191 self.NameToInfo = {} # Find file info given name
1192 self.filelist = [] # List of ZipInfo instances for archive
1193 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001194 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001195 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001196 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001197 self._comment = b''
Marcel Plch77b112c2018-08-31 16:43:31 +02001198 self._strict_timestamps = strict_timestamps
Tim Petersa19a1682001-03-29 04:36:09 +00001199
Fred Drake3d9091e2001-03-26 15:49:24 +00001200 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001201 if isinstance(file, os.PathLike):
1202 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001203 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001204 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001205 self._filePassed = 0
1206 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001207 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1208 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001209 filemode = modeDict[mode]
1210 while True:
1211 try:
1212 self.fp = io.open(file, filemode)
1213 except OSError:
1214 if filemode in modeDict:
1215 filemode = modeDict[filemode]
1216 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001217 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001218 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001219 else:
1220 self._filePassed = 1
1221 self.fp = file
1222 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001223 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001224 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001225 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001226 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001227
Antoine Pitrou17babc52012-11-17 23:50:08 +01001228 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001229 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001230 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001231 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001232 # set the modified flag so central directory gets written
1233 # even if no files are added to the archive
1234 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001235 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001236 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001237 except (AttributeError, OSError):
1238 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001239 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001240 self._seekable = False
1241 else:
1242 # Some file-like objects can provide tell() but not seek()
1243 try:
1244 self.fp.seek(self.start_dir)
1245 except (AttributeError, OSError):
1246 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001247 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001248 try:
1249 # See if file is a zip file
1250 self._RealGetContents()
1251 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001252 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001253 except BadZipFile:
1254 # file is not a zip file, just append
1255 self.fp.seek(0, 2)
1256
1257 # set the modified flag so central directory gets written
1258 # even if no files are added to the archive
1259 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001260 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001261 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001262 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001263 except:
1264 fp = self.fp
1265 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001266 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001267 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001269 def __enter__(self):
1270 return self
1271
1272 def __exit__(self, type, value, traceback):
1273 self.close()
1274
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001275 def __repr__(self):
1276 result = ['<%s.%s' % (self.__class__.__module__,
1277 self.__class__.__qualname__)]
1278 if self.fp is not None:
1279 if self._filePassed:
1280 result.append(' file=%r' % self.fp)
1281 elif self.filename is not None:
1282 result.append(' filename=%r' % self.filename)
1283 result.append(' mode=%r' % self.mode)
1284 else:
1285 result.append(' [closed]')
1286 result.append('>')
1287 return ''.join(result)
1288
Tim Peters7d3bad62001-04-04 18:56:49 +00001289 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001290 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001291 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001292 try:
1293 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001294 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001295 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001296 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001297 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001299 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001300 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1301 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001302 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001303
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001304 # "concat" is zero, unless zip was concatenated to another file
1305 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001306 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1307 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001308 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001309
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001311 inferred = concat + offset_cd
1312 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001314 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001315 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001316 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001317 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 total = 0
1319 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001320 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001321 if len(centdir) != sizeCentralDir:
1322 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001323 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001324 if centdir[_CD_SIGNATURE] != stringCentralDir:
1325 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001327 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001328 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001329 flags = centdir[5]
1330 if flags & 0x800:
1331 # UTF-8 file names extension
1332 filename = filename.decode('utf-8')
1333 else:
1334 # Historical ZIP filename encoding
1335 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001337 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001338 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1339 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001340 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001341 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001342 x.flag_bits, x.compress_type, t, d,
1343 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001344 if x.extract_version > MAX_EXTRACT_VERSION:
1345 raise NotImplementedError("zip file version %.1f" %
1346 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001347 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1348 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001349 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001351 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001352
1353 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001354 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001355 self.filelist.append(x)
1356 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001357
1358 # update total bytes read from central directory
1359 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1360 + centdir[_CD_EXTRA_FIELD_LENGTH]
1361 + centdir[_CD_COMMENT_LENGTH])
1362
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001363 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001364 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001365
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366
1367 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001368 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001369 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001370
1371 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001372 """Return a list of class ZipInfo instances for files in the
1373 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001374 return self.filelist
1375
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001376 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001377 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001378 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1379 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001380 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001381 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001382 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1383 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001384
1385 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001386 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001387 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388 for zinfo in self.filelist:
1389 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001390 # Read by chunks, to avoid an OverflowError or a
1391 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001392 with self.open(zinfo.filename, "r") as f:
1393 while f.read(chunk_size): # Check CRC-32
1394 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001395 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396 return zinfo.filename
1397
1398 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001399 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001400 info = self.NameToInfo.get(name)
1401 if info is None:
1402 raise KeyError(
1403 'There is no item named %r in the archive' % name)
1404
1405 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001406
Thomas Wouterscf297e42007-02-23 15:07:44 +00001407 def setpassword(self, pwd):
1408 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001409 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001410 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001411 if pwd:
1412 self.pwd = pwd
1413 else:
1414 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001415
R David Murrayf50b38a2012-04-12 18:44:58 -04001416 @property
1417 def comment(self):
1418 """The comment text associated with the ZIP file."""
1419 return self._comment
1420
1421 @comment.setter
1422 def comment(self, comment):
1423 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001424 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001425 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001426 if len(comment) > ZIP_MAX_COMMENT:
1427 import warnings
1428 warnings.warn('Archive comment is too long; truncating to %d bytes'
1429 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001430 comment = comment[:ZIP_MAX_COMMENT]
1431 self._comment = comment
1432 self._didModify = True
1433
Thomas Wouterscf297e42007-02-23 15:07:44 +00001434 def read(self, name, pwd=None):
Serhiy Storchaka4bb186d2018-11-25 09:51:14 +02001435 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001436 with self.open(name, "r", pwd) as fp:
1437 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001438
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001439 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001440 """Return file-like object for 'name'.
1441
1442 name is a string for the file name within the ZIP file, or a ZipInfo
1443 object.
1444
1445 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1446 write to a file newly added to the archive.
1447
1448 pwd is the password to decrypt files (only used for reading).
1449
1450 When writing, if the file size is not known in advance but may exceed
1451 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1452 files. If the size is known in advance, it is best to pass a ZipInfo
1453 instance for name, with zinfo.file_size set.
1454 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001455 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001456 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001457 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001458 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001459 if pwd and (mode == "w"):
1460 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001461 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001462 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001463 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001464
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001465 # Make sure we have an info object
1466 if isinstance(name, ZipInfo):
1467 # 'name' is already an info object
1468 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001469 elif mode == 'w':
1470 zinfo = ZipInfo(name)
1471 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001472 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001473 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001474 # Get info object for name
1475 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001476
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001477 if mode == 'w':
1478 return self._open_to_write(zinfo, force_zip64=force_zip64)
1479
1480 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001481 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001482 "is an open writing handle on it. "
1483 "Close the writing handle before trying to read.")
1484
1485 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001486 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001487 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1488 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001489 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001490 # Skip the file header:
1491 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001492 if len(fheader) != sizeFileHeader:
1493 raise BadZipFile("Truncated file header")
1494 fheader = struct.unpack(structFileHeader, fheader)
1495 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001496 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001497
Antoine Pitrou17babc52012-11-17 23:50:08 +01001498 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1499 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1500 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001501
Antoine Pitrou8572da52012-11-17 23:52:05 +01001502 if zinfo.flag_bits & 0x20:
1503 # Zip 2.7: compressed patched data
1504 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001505
Antoine Pitrou8572da52012-11-17 23:52:05 +01001506 if zinfo.flag_bits & 0x40:
1507 # strong encryption
1508 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001509
Antoine Pitrou17babc52012-11-17 23:50:08 +01001510 if zinfo.flag_bits & 0x800:
1511 # UTF-8 filename
1512 fname_str = fname.decode("utf-8")
1513 else:
1514 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001515
Antoine Pitrou17babc52012-11-17 23:50:08 +01001516 if fname_str != zinfo.orig_filename:
1517 raise BadZipFile(
1518 'File name in directory %r and header %r differ.'
1519 % (zinfo.orig_filename, fname))
1520
1521 # check for encrypted flag & handle password
1522 is_encrypted = zinfo.flag_bits & 0x1
1523 zd = None
1524 if is_encrypted:
1525 if not pwd:
1526 pwd = self.pwd
1527 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001528 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001529 "required for extraction" % name)
1530
1531 zd = _ZipDecrypter(pwd)
1532 # The first 12 bytes in the cypher stream is an encryption header
1533 # used to strengthen the algorithm. The first 11 bytes are
1534 # completely random, while the 12th contains the MSB of the CRC,
1535 # or the MSB of the file time depending on the header type
1536 # and is used to check the correctness of the password.
1537 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001538 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001539 if zinfo.flag_bits & 0x8:
1540 # compare against the file type from extended local headers
1541 check_byte = (zinfo._raw_time >> 8) & 0xff
1542 else:
1543 # compare against the CRC otherwise
1544 check_byte = (zinfo.CRC >> 24) & 0xff
1545 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001546 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001547
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001548 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001549 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001550 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001551 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001552
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001553 def _open_to_write(self, zinfo, force_zip64=False):
1554 if force_zip64 and not self._allowZip64:
1555 raise ValueError(
1556 "force_zip64 is True, but allowZip64 was False when opening "
1557 "the ZIP file."
1558 )
1559 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001560 raise ValueError("Can't write to the ZIP file while there is "
1561 "another write handle open on it. "
1562 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001563
1564 # Sizes and CRC are overwritten with correct data after processing the file
1565 if not hasattr(zinfo, 'file_size'):
1566 zinfo.file_size = 0
1567 zinfo.compress_size = 0
1568 zinfo.CRC = 0
1569
1570 zinfo.flag_bits = 0x00
1571 if zinfo.compress_type == ZIP_LZMA:
1572 # Compressed data includes an end-of-stream (EOS) marker
1573 zinfo.flag_bits |= 0x02
1574 if not self._seekable:
1575 zinfo.flag_bits |= 0x08
1576
1577 if not zinfo.external_attr:
1578 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1579
1580 # Compressed size can be larger than uncompressed size
1581 zip64 = self._allowZip64 and \
1582 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1583
1584 if self._seekable:
1585 self.fp.seek(self.start_dir)
1586 zinfo.header_offset = self.fp.tell()
1587
1588 self._writecheck(zinfo)
1589 self._didModify = True
1590
1591 self.fp.write(zinfo.FileHeader(zip64))
1592
1593 self._writing = True
1594 return _ZipWriteFile(self, zinfo, zip64)
1595
Christian Heimes790c8232008-01-07 21:14:23 +00001596 def extract(self, member, path=None, pwd=None):
1597 """Extract a member from the archive to the current working directory,
1598 using its full name. Its file information is extracted as accurately
1599 as possible. `member' may be a filename or a ZipInfo object. You can
1600 specify a different directory using `path'.
1601 """
Christian Heimes790c8232008-01-07 21:14:23 +00001602 if path is None:
1603 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001604 else:
1605 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001606
1607 return self._extract_member(member, path, pwd)
1608
1609 def extractall(self, path=None, members=None, pwd=None):
1610 """Extract all members from the archive to the current working
1611 directory. `path' specifies a different directory to extract to.
1612 `members' is optional and must be a subset of the list returned
1613 by namelist().
1614 """
1615 if members is None:
1616 members = self.namelist()
1617
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001618 if path is None:
1619 path = os.getcwd()
1620 else:
1621 path = os.fspath(path)
1622
Christian Heimes790c8232008-01-07 21:14:23 +00001623 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001624 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001625
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001626 @classmethod
1627 def _sanitize_windows_name(cls, arcname, pathsep):
1628 """Replace bad characters and remove trailing dots from parts."""
1629 table = cls._windows_illegal_name_trans_table
1630 if not table:
1631 illegal = ':<>|"?*'
1632 table = str.maketrans(illegal, '_' * len(illegal))
1633 cls._windows_illegal_name_trans_table = table
1634 arcname = arcname.translate(table)
1635 # remove trailing dots
1636 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1637 # rejoin, removing empty parts.
1638 arcname = pathsep.join(x for x in arcname if x)
1639 return arcname
1640
Christian Heimes790c8232008-01-07 21:14:23 +00001641 def _extract_member(self, member, targetpath, pwd):
1642 """Extract the ZipInfo object 'member' to a physical
1643 file on the path targetpath.
1644 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001645 if not isinstance(member, ZipInfo):
1646 member = self.getinfo(member)
1647
Christian Heimes790c8232008-01-07 21:14:23 +00001648 # build the destination pathname, replacing
1649 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001650 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001651
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001652 if os.path.altsep:
1653 arcname = arcname.replace(os.path.altsep, os.path.sep)
1654 # interpret absolute pathname as relative, remove drive letter or
1655 # UNC path, redundant separators, "." and ".." components.
1656 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001657 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001658 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001659 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001660 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001661 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001662 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001663
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001664 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001665 targetpath = os.path.normpath(targetpath)
1666
1667 # Create all upper directories if necessary.
1668 upperdirs = os.path.dirname(targetpath)
1669 if upperdirs and not os.path.exists(upperdirs):
1670 os.makedirs(upperdirs)
1671
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001672 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001673 if not os.path.isdir(targetpath):
1674 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001675 return targetpath
1676
Antoine Pitrou17babc52012-11-17 23:50:08 +01001677 with self.open(member, pwd=pwd) as source, \
1678 open(targetpath, "wb") as target:
1679 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001680
1681 return targetpath
1682
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001683 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001684 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001685 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001686 import warnings
1687 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001688 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001689 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001690 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001691 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001692 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001693 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001694 if not self._allowZip64:
1695 requires_zip64 = None
1696 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1697 requires_zip64 = "Files count"
1698 elif zinfo.file_size > ZIP64_LIMIT:
1699 requires_zip64 = "Filesize"
1700 elif zinfo.header_offset > ZIP64_LIMIT:
1701 requires_zip64 = "Zipfile size"
1702 if requires_zip64:
1703 raise LargeZipFile(requires_zip64 +
1704 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001705
Bo Baylesce237c72018-01-29 23:54:07 -06001706 def write(self, filename, arcname=None,
Marcel Plch77b112c2018-08-31 16:43:31 +02001707 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001708 """Put the bytes from filename into the archive under the name
1709 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001710 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001711 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001712 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001713 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001714 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001715 "Can't write to ZIP archive while an open writing handle exists"
1716 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001717
Marcel Plcha2fe1e52018-08-02 15:04:52 +02001718 zinfo = ZipInfo.from_file(filename, arcname,
Marcel Plch77b112c2018-08-31 16:43:31 +02001719 strict_timestamps=self._strict_timestamps)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001720
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001721 if zinfo.is_dir():
1722 zinfo.compress_size = 0
1723 zinfo.CRC = 0
1724 else:
1725 if compress_type is not None:
1726 zinfo.compress_type = compress_type
1727 else:
1728 zinfo.compress_type = self.compression
1729
Bo Baylesce237c72018-01-29 23:54:07 -06001730 if compresslevel is not None:
1731 zinfo._compresslevel = compresslevel
1732 else:
1733 zinfo._compresslevel = self.compresslevel
1734
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001735 if zinfo.is_dir():
1736 with self._lock:
1737 if self._seekable:
1738 self.fp.seek(self.start_dir)
1739 zinfo.header_offset = self.fp.tell() # Start of header bytes
1740 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001741 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001742 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001743
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001744 self._writecheck(zinfo)
1745 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001746
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001747 self.filelist.append(zinfo)
1748 self.NameToInfo[zinfo.filename] = zinfo
1749 self.fp.write(zinfo.FileHeader(False))
1750 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001751 else:
1752 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1753 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001754
Bo Baylesce237c72018-01-29 23:54:07 -06001755 def writestr(self, zinfo_or_arcname, data,
1756 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001757 """Write a file into the archive. The contents is 'data', which
1758 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1759 it is encoded as UTF-8 first.
1760 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001761 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001762 if isinstance(data, str):
1763 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001764 if not isinstance(zinfo_or_arcname, ZipInfo):
1765 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001766 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001767 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001768 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001769 if zinfo.filename[-1] == '/':
1770 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1771 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1772 else:
1773 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001774 else:
1775 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001776
1777 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001778 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001779 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001780 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001781 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001782 "Can't write to ZIP archive while an open writing handle exists."
1783 )
1784
1785 if compress_type is not None:
1786 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001787
Bo Baylesce237c72018-01-29 23:54:07 -06001788 if compresslevel is not None:
1789 zinfo._compresslevel = compresslevel
1790
Guido van Rossum85825dc2007-08-27 17:03:28 +00001791 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001792 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001793 with self.open(zinfo, mode='w') as dest:
1794 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001795
1796 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001797 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001798 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001799
1800 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001801 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001802 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001803 if self.fp is None:
1804 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001805
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001806 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001807 raise ValueError("Can't close the ZIP file while there is "
1808 "an open writing handle on it. "
1809 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001810
Antoine Pitrou17babc52012-11-17 23:50:08 +01001811 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001812 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001813 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001814 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001815 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001816 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001817 finally:
1818 fp = self.fp
1819 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001820 self._fpclose(fp)
1821
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001822 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001823 for zinfo in self.filelist: # write central directory
1824 dt = zinfo.date_time
1825 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1826 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1827 extra = []
1828 if zinfo.file_size > ZIP64_LIMIT \
1829 or zinfo.compress_size > ZIP64_LIMIT:
1830 extra.append(zinfo.file_size)
1831 extra.append(zinfo.compress_size)
1832 file_size = 0xffffffff
1833 compress_size = 0xffffffff
1834 else:
1835 file_size = zinfo.file_size
1836 compress_size = zinfo.compress_size
1837
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001838 if zinfo.header_offset > ZIP64_LIMIT:
1839 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001840 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001841 else:
1842 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001843
1844 extra_data = zinfo.extra
1845 min_version = 0
1846 if extra:
1847 # Append a ZIP64 field to the extra's
Serhiy Storchaka9bdb7be2018-09-17 15:36:40 +03001848 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001849 extra_data = struct.pack(
1850 '<HH' + 'Q'*len(extra),
1851 1, 8*len(extra), *extra) + extra_data
1852
1853 min_version = ZIP64_VERSION
1854
1855 if zinfo.compress_type == ZIP_BZIP2:
1856 min_version = max(BZIP2_VERSION, min_version)
1857 elif zinfo.compress_type == ZIP_LZMA:
1858 min_version = max(LZMA_VERSION, min_version)
1859
1860 extract_version = max(min_version, zinfo.extract_version)
1861 create_version = max(min_version, zinfo.create_version)
1862 try:
1863 filename, flag_bits = zinfo._encodeFilenameFlags()
1864 centdir = struct.pack(structCentralDir,
1865 stringCentralDir, create_version,
1866 zinfo.create_system, extract_version, zinfo.reserved,
1867 flag_bits, zinfo.compress_type, dostime, dosdate,
1868 zinfo.CRC, compress_size, file_size,
1869 len(filename), len(extra_data), len(zinfo.comment),
1870 0, zinfo.internal_attr, zinfo.external_attr,
1871 header_offset)
1872 except DeprecationWarning:
1873 print((structCentralDir, stringCentralDir, create_version,
1874 zinfo.create_system, extract_version, zinfo.reserved,
1875 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1876 zinfo.CRC, compress_size, file_size,
1877 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1878 0, zinfo.internal_attr, zinfo.external_attr,
1879 header_offset), file=sys.stderr)
1880 raise
1881 self.fp.write(centdir)
1882 self.fp.write(filename)
1883 self.fp.write(extra_data)
1884 self.fp.write(zinfo.comment)
1885
1886 pos2 = self.fp.tell()
1887 # Write end-of-zip-archive record
1888 centDirCount = len(self.filelist)
1889 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001890 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001891 requires_zip64 = None
1892 if centDirCount > ZIP_FILECOUNT_LIMIT:
1893 requires_zip64 = "Files count"
1894 elif centDirOffset > ZIP64_LIMIT:
1895 requires_zip64 = "Central directory offset"
1896 elif centDirSize > ZIP64_LIMIT:
1897 requires_zip64 = "Central directory size"
1898 if requires_zip64:
1899 # Need to write the ZIP64 end-of-archive records
1900 if not self._allowZip64:
1901 raise LargeZipFile(requires_zip64 +
1902 " would require ZIP64 extensions")
1903 zip64endrec = struct.pack(
1904 structEndArchive64, stringEndArchive64,
1905 44, 45, 45, 0, 0, centDirCount, centDirCount,
1906 centDirSize, centDirOffset)
1907 self.fp.write(zip64endrec)
1908
1909 zip64locrec = struct.pack(
1910 structEndArchive64Locator,
1911 stringEndArchive64Locator, 0, pos2, 1)
1912 self.fp.write(zip64locrec)
1913 centDirCount = min(centDirCount, 0xFFFF)
1914 centDirSize = min(centDirSize, 0xFFFFFFFF)
1915 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1916
1917 endrec = struct.pack(structEndArchive, stringEndArchive,
1918 0, 0, centDirCount, centDirCount,
1919 centDirSize, centDirOffset, len(self._comment))
1920 self.fp.write(endrec)
1921 self.fp.write(self._comment)
1922 self.fp.flush()
1923
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001924 def _fpclose(self, fp):
1925 assert self._fileRefCnt > 0
1926 self._fileRefCnt -= 1
1927 if not self._fileRefCnt and not self._filePassed:
1928 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001929
1930
1931class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001932 """Class to create ZIP archives with Python library files and packages."""
1933
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001935 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001936 ZipFile.__init__(self, file, mode=mode, compression=compression,
1937 allowZip64=allowZip64)
1938 self._optimize = optimize
1939
Christian Tismer59202e52013-10-21 03:59:23 +02001940 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001941 """Add all files from "pathname" to the ZIP archive.
1942
Fred Drake484d7352000-10-02 21:14:52 +00001943 If pathname is a package directory, search the directory and
1944 all package subdirectories recursively for all *.py and enter
1945 the modules into the archive. If pathname is a plain
1946 directory, listdir *.py and enter all modules. Else, pathname
1947 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001948 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001949 This method will compile the module.py into module.pyc if
1950 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001951 If filterfunc(pathname) is given, it is called with every argument.
1952 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001953 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001954 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001955 if filterfunc and not filterfunc(pathname):
1956 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001957 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001958 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001959 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001960 dir, name = os.path.split(pathname)
1961 if os.path.isdir(pathname):
1962 initname = os.path.join(pathname, "__init__.py")
1963 if os.path.isfile(initname):
1964 # This is a package directory, add it
1965 if basename:
1966 basename = "%s/%s" % (basename, name)
1967 else:
1968 basename = name
1969 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001970 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001971 fname, arcname = self._get_codename(initname[0:-3], basename)
1972 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001973 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001974 self.write(fname, arcname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001975 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001976 dirlist.remove("__init__.py")
1977 # Add all *.py files and package subdirectories
1978 for filename in dirlist:
1979 path = os.path.join(pathname, filename)
1980 root, ext = os.path.splitext(filename)
1981 if os.path.isdir(path):
1982 if os.path.isfile(os.path.join(path, "__init__.py")):
1983 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001984 self.writepy(path, basename,
1985 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001986 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001987 if filterfunc and not filterfunc(path):
1988 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001989 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001990 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001991 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001992 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001993 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001994 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001995 self.write(fname, arcname)
1996 else:
1997 # This is NOT a package directory, add its files at top level
1998 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001999 print("Adding files from directory", pathname)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002000 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002001 path = os.path.join(pathname, filename)
2002 root, ext = os.path.splitext(filename)
2003 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02002004 if filterfunc and not filterfunc(path):
2005 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002006 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002007 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002008 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002009 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002010 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002011 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002012 self.write(fname, arcname)
2013 else:
2014 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002015 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002016 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002017 fname, arcname = self._get_codename(pathname[0:-3], basename)
2018 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002019 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002020 self.write(fname, arcname)
2021
2022 def _get_codename(self, pathname, basename):
2023 """Return (filename, archivename) for the path.
2024
Fred Drake484d7352000-10-02 21:14:52 +00002025 Given a module name path, return the correct file path and
2026 archive name, compiling if necessary. For example, given
2027 /python/lib/string, return (/python/lib/string.pyc, string).
2028 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002029 def _compile(file, optimize=-1):
2030 import py_compile
2031 if self.debug:
2032 print("Compiling", file)
2033 try:
2034 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002035 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002036 print(err.msg)
2037 return False
2038 return True
2039
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002040 file_py = pathname + ".py"
2041 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002042 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2043 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2044 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002045 if self._optimize == -1:
2046 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002047 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002048 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2049 # Use .pyc file.
2050 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002051 elif (os.path.isfile(pycache_opt0) and
2052 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002053 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2054 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002055 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002056 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002057 elif (os.path.isfile(pycache_opt1) and
2058 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2059 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002060 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002061 fname = pycache_opt1
2062 arcname = file_pyc
2063 elif (os.path.isfile(pycache_opt2) and
2064 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2065 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2066 # file name in the archive.
2067 fname = pycache_opt2
2068 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002069 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002070 # Compile py into PEP 3147 pyc file.
2071 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002072 if sys.flags.optimize == 0:
2073 fname = pycache_opt0
2074 elif sys.flags.optimize == 1:
2075 fname = pycache_opt1
2076 else:
2077 fname = pycache_opt2
2078 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002079 else:
2080 fname = arcname = file_py
2081 else:
2082 # new mode: use given optimization level
2083 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002084 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002085 arcname = file_pyc
2086 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002087 arcname = file_pyc
2088 if self._optimize == 1:
2089 fname = pycache_opt1
2090 elif self._optimize == 2:
2091 fname = pycache_opt2
2092 else:
2093 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2094 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002095 if not (os.path.isfile(fname) and
2096 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2097 if not _compile(file_py, optimize=self._optimize):
2098 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002099 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002100 if basename:
2101 archivename = "%s/%s" % (basename, archivename)
2102 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002103
2104
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002105def main(args=None):
2106 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002107
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002108 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002109 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002110 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002111 group.add_argument('-l', '--list', metavar='<zipfile>',
2112 help='Show listing of a zipfile')
2113 group.add_argument('-e', '--extract', nargs=2,
2114 metavar=('<zipfile>', '<output_dir>'),
2115 help='Extract zipfile into target dir')
2116 group.add_argument('-c', '--create', nargs='+',
2117 metavar=('<name>', '<file>'),
2118 help='Create zipfile from sources')
2119 group.add_argument('-t', '--test', metavar='<zipfile>',
2120 help='Test if a zipfile is valid')
2121 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002122
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002123 if args.test is not None:
2124 src = args.test
2125 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002126 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002127 if badfile:
2128 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002129 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002130
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002131 elif args.list is not None:
2132 src = args.list
2133 with ZipFile(src, 'r') as zf:
2134 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002135
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002136 elif args.extract is not None:
2137 src, curdir = args.extract
2138 with ZipFile(src, 'r') as zf:
2139 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002140
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002141 elif args.create is not None:
2142 zip_name = args.create.pop(0)
2143 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002144
2145 def addToZip(zf, path, zippath):
2146 if os.path.isfile(path):
2147 zf.write(path, zippath, ZIP_DEFLATED)
2148 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002149 if zippath:
2150 zf.write(path, zippath)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01002151 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002152 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002153 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002154 # else: ignore
2155
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002156 with ZipFile(zip_name, 'w') as zf:
2157 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002158 zippath = os.path.basename(path)
2159 if not zippath:
2160 zippath = os.path.basename(os.path.dirname(path))
2161 if zippath in ('', os.curdir, os.pardir):
2162 zippath = ''
2163 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002164
2165if __name__ == "__main__":
2166 main()