blob: e83600e13d5efdff4a64c8be2febccabc38964a6 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Brett Cannonb57a0852013-06-15 17:32:30 -04008import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import sys
10import time
11import stat
12import shutil
13import struct
14import binascii
Antoine Pitroua6a4dc82017-09-07 18:56:24 +020015import threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
17try:
Tim Peterse1190062001-01-15 03:34:38 +000018 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000019 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000022 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020024try:
25 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040026except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020027 bz2 = None
28
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020029try:
30 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020032 lzma = None
33
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020034__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020035 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000036 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000037
Georg Brandl4d540882010-10-28 06:42:33 +000038class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000040
41
42class LargeZipFile(Exception):
43 """
44 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
45 and those extensions are disabled.
46 """
47
Georg Brandl4d540882010-10-28 06:42:33 +000048error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
49
Guido van Rossum32abe6f2000-03-31 17:30:02 +000050
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000051ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030052ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# constants for Zip file compression methods
56ZIP_STORED = 0
57ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020058ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020059ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# Other ZIP compression methods not supported
61
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020062DEFAULT_VERSION = 20
63ZIP64_VERSION = 45
64BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020065LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020066# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020068
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069# Below are some formats and associated data for reading/writing headers using
70# the struct module. The names and structures of headers/records are those used
71# in the PKWARE description of the ZIP file format:
72# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
73# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000074
Martin v. Löwisb09b8442008-07-03 14:13:42 +000075# The "end of central directory" structure, magic number, size, and indices
76# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000077structEndArchive = b"<4s4H2LH"
78stringEndArchive = b"PK\005\006"
79sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080
81_ECD_SIGNATURE = 0
82_ECD_DISK_NUMBER = 1
83_ECD_DISK_START = 2
84_ECD_ENTRIES_THIS_DISK = 3
85_ECD_ENTRIES_TOTAL = 4
86_ECD_SIZE = 5
87_ECD_OFFSET = 6
88_ECD_COMMENT_SIZE = 7
89# These last two indices are not part of the structure as defined in the
90# spec, but they are used internally by this module as a convenience
91_ECD_COMMENT = 8
92_ECD_LOCATION = 9
93
94# The "central directory" structure, magic number, size, and indices
95# of entries in the structure (section V.F in the format document)
96structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeCentralDir = struct.calcsize(structCentralDir)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100# indexes of entries in the central directory structure
101_CD_SIGNATURE = 0
102_CD_CREATE_VERSION = 1
103_CD_CREATE_SYSTEM = 2
104_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000106_CD_FLAG_BITS = 5
107_CD_COMPRESS_TYPE = 6
108_CD_TIME = 7
109_CD_DATE = 8
110_CD_CRC = 9
111_CD_COMPRESSED_SIZE = 10
112_CD_UNCOMPRESSED_SIZE = 11
113_CD_FILENAME_LENGTH = 12
114_CD_EXTRA_FIELD_LENGTH = 13
115_CD_COMMENT_LENGTH = 14
116_CD_DISK_NUMBER_START = 15
117_CD_INTERNAL_FILE_ATTRIBUTES = 16
118_CD_EXTERNAL_FILE_ATTRIBUTES = 17
119_CD_LOCAL_HEADER_OFFSET = 18
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "local file header" structure, magic number, size, and indices
122# (section V.A in the format document)
123structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000124stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125sizeFileHeader = struct.calcsize(structFileHeader)
126
Fred Drake3e038e52001-02-28 17:56:26 +0000127_FH_SIGNATURE = 0
128_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000129_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000130_FH_GENERAL_PURPOSE_FLAG_BITS = 3
131_FH_COMPRESSION_METHOD = 4
132_FH_LAST_MOD_TIME = 5
133_FH_LAST_MOD_DATE = 6
134_FH_CRC = 7
135_FH_COMPRESSED_SIZE = 8
136_FH_UNCOMPRESSED_SIZE = 9
137_FH_FILENAME_LENGTH = 10
138_FH_EXTRA_FIELD_LENGTH = 11
139
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000140# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000141structEndArchive64Locator = "<4sLQL"
142stringEndArchive64Locator = b"PK\x06\x07"
143sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000144
145# The "Zip64 end of central directory" record, magic number, size, and indices
146# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000147structEndArchive64 = "<4sQ2H2L4Q"
148stringEndArchive64 = b"PK\x06\x06"
149sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000150
151_CD64_SIGNATURE = 0
152_CD64_DIRECTORY_RECSIZE = 1
153_CD64_CREATE_VERSION = 2
154_CD64_EXTRACT_VERSION = 3
155_CD64_DISK_NUMBER = 4
156_CD64_DISK_NUMBER_START = 5
157_CD64_NUMBER_ENTRIES_THIS_DISK = 6
158_CD64_NUMBER_ENTRIES_TOTAL = 7
159_CD64_DIRECTORY_SIZE = 8
160_CD64_OFFSET_START_CENTDIR = 9
161
Miss Islington (bot)44989bc2018-09-22 11:03:04 -0700162_DD_SIGNATURE = 0x08074b50
163
Miss Islington (bot)efdf3162018-09-17 06:08:45 -0700164_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
165
166def _strip_extra(extra, xids):
167 # Remove Extra Fields with specified IDs.
168 unpack = _EXTRA_FIELD_STRUCT.unpack
169 modified = False
170 buffer = []
171 start = i = 0
172 while i + 4 <= len(extra):
173 xid, xlen = unpack(extra[i : i + 4])
174 j = i + 4 + xlen
175 if xid in xids:
176 if i != start:
177 buffer.append(extra[start : i])
178 start = j
179 modified = True
180 i = j
181 if not modified:
182 return extra
183 return b''.join(buffer)
184
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000186 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000187 if _EndRecData(fp):
188 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200189 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000190 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000191 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000192
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000193def is_zipfile(filename):
194 """Quickly see if a file is a ZIP file by checking the magic number.
195
196 The filename argument may be a file or file-like object too.
197 """
198 result = False
199 try:
200 if hasattr(filename, "read"):
201 result = _check_zipfile(fp=filename)
202 else:
203 with open(filename, "rb") as fp:
204 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200205 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000206 pass
207 return result
208
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000209def _EndRecData64(fpin, offset, endrec):
210 """
211 Read the ZIP64 end-of-archive records and use that to update endrec
212 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000213 try:
214 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200215 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000216 # If the seek fails, the file is not large enough to contain a ZIP64
217 # end-of-archive record, so just return the end record we were given.
218 return endrec
219
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200221 if len(data) != sizeEndCentDir64Locator:
222 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000223 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
224 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000225 return endrec
226
227 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000228 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000229
230 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
232 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200233 if len(data) != sizeEndCentDir64:
234 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000235 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200236 dircount, dircount2, dirsize, diroffset = \
237 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000238 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000239 return endrec
240
241 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000242 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000243 endrec[_ECD_DISK_NUMBER] = disk_num
244 endrec[_ECD_DISK_START] = disk_dir
245 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
246 endrec[_ECD_ENTRIES_TOTAL] = dircount2
247 endrec[_ECD_SIZE] = dirsize
248 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000249 return endrec
250
251
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000252def _EndRecData(fpin):
253 """Return data from the "End of Central Directory" record, or None.
254
255 The data is a list of the nine items in the ZIP "End of central dir"
256 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
258 # Determine file size
259 fpin.seek(0, 2)
260 filesize = fpin.tell()
261
262 # Check to see if this is ZIP file with no archive comment (the
263 # "end of central directory" structure should be the last item in the
264 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000265 try:
266 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200267 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000268 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200270 if (len(data) == sizeEndCentDir and
271 data[0:4] == stringEndArchive and
272 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000273 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000274 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 endrec=list(endrec)
276
277 # Append a blank comment and record start offset
278 endrec.append(b"")
279 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000280
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000281 # Try to read the "Zip64 end of central directory" structure
282 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000283
284 # Either this is not a ZIP file, or it is a ZIP file with an archive
285 # comment. Search the end of the file for the "end of central directory"
286 # record signature. The comment is the last item in the ZIP file and may be
287 # up to 64K long. It is assumed that the "end of central directory" magic
288 # number does not appear in the comment.
289 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
290 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000291 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000292 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000293 if start >= 0:
294 # found the magic number; attempt to unpack and interpret
295 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200296 if len(recData) != sizeEndCentDir:
297 # Zip file is corrupted.
298 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000299 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400300 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
301 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
302 endrec.append(comment)
303 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000304
R David Murray4fbb9db2011-06-09 15:50:51 -0400305 # Try to read the "Zip64 end of central directory" structure
306 return _EndRecData64(fpin, maxCommentStart + start - filesize,
307 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000308
309 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200310 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000311
Fred Drake484d7352000-10-02 21:14:52 +0000312
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000313class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Class with attributes describing each file in the ZIP archive."""
315
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200317 'orig_filename',
318 'filename',
319 'date_time',
320 'compress_type',
Bo Baylesce237c72018-01-29 23:54:07 -0600321 '_compresslevel',
Christian Tismer59202e52013-10-21 03:59:23 +0200322 'comment',
323 'extra',
324 'create_system',
325 'create_version',
326 'extract_version',
327 'reserved',
328 'flag_bits',
329 'volume',
330 'internal_attr',
331 'external_attr',
332 'header_offset',
333 'CRC',
334 'compress_size',
335 'file_size',
336 '_raw_time',
337 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000338
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000340 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
342 # Terminate the file name at the first null byte. Null bytes in file
343 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000344 null_byte = filename.find(chr(0))
345 if null_byte >= 0:
346 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000347 # This is used to ensure paths in generated ZIP files always use
348 # forward slashes as the directory separator, as required by the
349 # ZIP format specification.
350 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000351 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352
Greg Ward8e36d282003-06-18 00:53:06 +0000353 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000354 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800355
356 if date_time[0] < 1980:
357 raise ValueError('ZIP does not support timestamps before 1980')
358
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000359 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000360 self.compress_type = ZIP_STORED # Type of compression for the file
Bo Baylesce237c72018-01-29 23:54:07 -0600361 self._compresslevel = None # Level for the compressor
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000362 self.comment = b"" # Comment for each file
363 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000364 if sys.platform == 'win32':
365 self.create_system = 0 # System which created ZIP archive
366 else:
367 # Assume everything else is unix-y
368 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200369 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
370 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000371 self.reserved = 0 # Must be zero
372 self.flag_bits = 0 # ZIP flag bits
373 self.volume = 0 # Volume number of file header
374 self.internal_attr = 0 # Internal attributes
375 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000376 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000377 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000378 # CRC CRC-32 of the uncompressed file
379 # compress_size Size of the compressed file
380 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200382 def __repr__(self):
383 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
384 if self.compress_type != ZIP_STORED:
385 result.append(' compress_type=%s' %
386 compressor_names.get(self.compress_type,
387 self.compress_type))
388 hi = self.external_attr >> 16
389 lo = self.external_attr & 0xFFFF
390 if hi:
391 result.append(' filemode=%r' % stat.filemode(hi))
392 if lo:
393 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200394 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200395 if not isdir or self.file_size:
396 result.append(' file_size=%r' % self.file_size)
397 if ((not isdir or self.compress_size) and
398 (self.compress_type != ZIP_STORED or
399 self.file_size != self.compress_size)):
400 result.append(' compress_size=%r' % self.compress_size)
401 result.append('>')
402 return ''.join(result)
403
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200404 def FileHeader(self, zip64=None):
Miss Islington (bot)89a30872018-11-25 01:30:37 -0800405 """Return the per-file header as a bytes object."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000406 dt = self.date_time
407 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000408 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000410 # Set these to zero because we write them after the file data
411 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000412 else:
Tim Peterse1190062001-01-15 03:34:38 +0000413 CRC = self.CRC
414 compress_size = self.compress_size
415 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000416
417 extra = self.extra
418
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200419 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200420 if zip64 is None:
421 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
422 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000423 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000424 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200425 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200426 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
427 if not zip64:
428 raise LargeZipFile("Filesize would require ZIP64 extensions")
429 # File is larger than what fits into a 4 byte integer,
430 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000431 file_size = 0xffffffff
432 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200433 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000434
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200435 if self.compress_type == ZIP_BZIP2:
436 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200437 elif self.compress_type == ZIP_LZMA:
438 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200439
440 self.extract_version = max(min_version, self.extract_version)
441 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000442 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000443 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200444 self.extract_version, self.reserved, flag_bits,
445 self.compress_type, dostime, dosdate, CRC,
446 compress_size, file_size,
447 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000448 return header + filename + extra
449
450 def _encodeFilenameFlags(self):
451 try:
452 return self.filename.encode('ascii'), self.flag_bits
453 except UnicodeEncodeError:
454 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455
456 def _decodeExtra(self):
457 # Try to decode the extra field.
458 extra = self.extra
459 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700460 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000461 tp, ln = unpack('<HH', extra[:4])
Serhiy Storchakafeccdb22017-03-09 18:34:03 +0200462 if ln+4 > len(extra):
463 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
464 if tp == 0x0001:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000465 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000466 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000467 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000468 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000469 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000470 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000471 elif ln == 0:
472 counts = ()
473 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300474 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000475
476 idx = 0
477
478 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000479 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000480 self.file_size = counts[idx]
481 idx += 1
482
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000483 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000484 self.compress_size = counts[idx]
485 idx += 1
486
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000487 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000488 old = self.header_offset
489 self.header_offset = counts[idx]
490 idx+=1
491
492 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000493
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200494 @classmethod
495 def from_file(cls, filename, arcname=None):
496 """Construct an appropriate ZipInfo for a file on the filesystem.
497
498 filename should be the path to a file or directory on the filesystem.
499
500 arcname is the name which it will have within the archive (by default,
501 this will be the same as filename, but without a drive letter and with
502 leading path separators removed).
503 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +0200504 if isinstance(filename, os.PathLike):
505 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200506 st = os.stat(filename)
507 isdir = stat.S_ISDIR(st.st_mode)
508 mtime = time.localtime(st.st_mtime)
509 date_time = mtime[0:6]
510 # Create ZipInfo instance to store file information
511 if arcname is None:
512 arcname = filename
513 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
514 while arcname[0] in (os.sep, os.altsep):
515 arcname = arcname[1:]
516 if isdir:
517 arcname += '/'
518 zinfo = cls(arcname, date_time)
519 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
520 if isdir:
521 zinfo.file_size = 0
522 zinfo.external_attr |= 0x10 # MS-DOS directory flag
523 else:
524 zinfo.file_size = st.st_size
525
526 return zinfo
527
528 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300529 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200530 return self.filename[-1] == '/'
531
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000532
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300533# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
534# internal keys. We noticed that a direct implementation is faster than
535# relying on binascii.crc32().
Thomas Wouterscf297e42007-02-23 15:07:44 +0000536
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300537_crctable = None
538def _gen_crc(crc):
539 for j in range(8):
540 if crc & 1:
541 crc = (crc >> 1) ^ 0xEDB88320
542 else:
543 crc >>= 1
544 return crc
Thomas Wouterscf297e42007-02-23 15:07:44 +0000545
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300546# ZIP supports a password-based form of encryption. Even though known
547# plaintext attacks have been found against it, it is still useful
548# to be able to get data out of such a file.
549#
550# Usage:
551# zd = _ZipDecrypter(mypwd)
552# plain_bytes = zd(cypher_bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000553
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300554def _ZipDecrypter(pwd):
555 key0 = 305419896
556 key1 = 591751049
557 key2 = 878082192
Thomas Wouterscf297e42007-02-23 15:07:44 +0000558
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300559 global _crctable
560 if _crctable is None:
561 _crctable = list(map(_gen_crc, range(256)))
562 crctable = _crctable
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300564 def crc32(ch, crc):
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565 """Compute the CRC32 primitive on one byte."""
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300566 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000567
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300568 def update_keys(c):
569 nonlocal key0, key1, key2
570 key0 = crc32(c, key0)
571 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
572 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
573 key2 = crc32(key1 >> 24, key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000574
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300575 for p in pwd:
576 update_keys(p)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000577
Serhiy Storchaka06e52252017-03-30 19:09:08 +0300578 def decrypter(data):
579 """Decrypt a bytes object."""
580 result = bytearray()
581 append = result.append
582 for c in data:
583 k = key2 | 2
584 c ^= ((k * (k^1)) >> 8) & 0xFF
585 update_keys(c)
586 append(c)
587 return bytes(result)
588
589 return decrypter
Thomas Wouterscf297e42007-02-23 15:07:44 +0000590
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200591
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200592class LZMACompressor:
593
594 def __init__(self):
595 self._comp = None
596
597 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200598 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200599 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200600 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200601 ])
602 return struct.pack('<BBH', 9, 4, len(props)) + props
603
604 def compress(self, data):
605 if self._comp is None:
606 return self._init() + self._comp.compress(data)
607 return self._comp.compress(data)
608
609 def flush(self):
610 if self._comp is None:
611 return self._init() + self._comp.flush()
612 return self._comp.flush()
613
614
615class LZMADecompressor:
616
617 def __init__(self):
618 self._decomp = None
619 self._unconsumed = b''
620 self.eof = False
621
622 def decompress(self, data):
623 if self._decomp is None:
624 self._unconsumed += data
625 if len(self._unconsumed) <= 4:
626 return b''
627 psize, = struct.unpack('<H', self._unconsumed[2:4])
628 if len(self._unconsumed) <= 4 + psize:
629 return b''
630
631 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200632 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
633 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200634 ])
635 data = self._unconsumed[4 + psize:]
636 del self._unconsumed
637
638 result = self._decomp.decompress(data)
639 self.eof = self._decomp.eof
640 return result
641
642
643compressor_names = {
644 0: 'store',
645 1: 'shrink',
646 2: 'reduce',
647 3: 'reduce',
648 4: 'reduce',
649 5: 'reduce',
650 6: 'implode',
651 7: 'tokenize',
652 8: 'deflate',
653 9: 'deflate64',
654 10: 'implode',
655 12: 'bzip2',
656 14: 'lzma',
657 18: 'terse',
658 19: 'lz77',
659 97: 'wavpack',
660 98: 'ppmd',
661}
662
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200663def _check_compression(compression):
664 if compression == ZIP_STORED:
665 pass
666 elif compression == ZIP_DEFLATED:
667 if not zlib:
668 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200669 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200670 elif compression == ZIP_BZIP2:
671 if not bz2:
672 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200673 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200674 elif compression == ZIP_LZMA:
675 if not lzma:
676 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200677 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300679 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200680
681
Bo Baylesce237c72018-01-29 23:54:07 -0600682def _get_compressor(compress_type, compresslevel=None):
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200683 if compress_type == ZIP_DEFLATED:
Bo Baylesce237c72018-01-29 23:54:07 -0600684 if compresslevel is not None:
685 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
686 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687 elif compress_type == ZIP_BZIP2:
Bo Baylesce237c72018-01-29 23:54:07 -0600688 if compresslevel is not None:
689 return bz2.BZ2Compressor(compresslevel)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690 return bz2.BZ2Compressor()
Bo Baylesce237c72018-01-29 23:54:07 -0600691 # compresslevel is ignored for ZIP_LZMA
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200692 elif compress_type == ZIP_LZMA:
693 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694 else:
695 return None
696
697
698def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200699 if compress_type == ZIP_STORED:
700 return None
701 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200702 return zlib.decompressobj(-15)
703 elif compress_type == ZIP_BZIP2:
704 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200705 elif compress_type == ZIP_LZMA:
706 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200707 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200708 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200709 if descr:
710 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
711 else:
712 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713
714
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200715class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300716 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200717 self._file = file
718 self._pos = pos
719 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200720 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300721 self._writing = writing
John Jolly066df4f2018-01-30 01:51:35 -0700722 self.seekable = file.seekable
723 self.tell = file.tell
724
725 def seek(self, offset, whence=0):
726 with self._lock:
Miss Islington (bot)ad4f64d2018-07-29 12:57:21 -0700727 if self._writing():
John Jolly066df4f2018-01-30 01:51:35 -0700728 raise ValueError("Can't reposition in the ZIP file while "
729 "there is an open writing handle on it. "
730 "Close the writing handle before trying to read.")
Miss Islington (bot)ad4f64d2018-07-29 12:57:21 -0700731 self._file.seek(offset, whence)
John Jolly066df4f2018-01-30 01:51:35 -0700732 self._pos = self._file.tell()
733 return self._pos
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200734
735 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200736 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300737 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300738 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300739 "is an open writing handle on it. "
740 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200741 self._file.seek(self._pos)
742 data = self._file.read(n)
743 self._pos = self._file.tell()
744 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200745
746 def close(self):
747 if self._file is not None:
748 fileobj = self._file
749 self._file = None
750 self._close(fileobj)
751
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200752# Provide the tell method for unseekable stream
753class _Tellable:
754 def __init__(self, fp):
755 self.fp = fp
756 self.offset = 0
757
758 def write(self, data):
759 n = self.fp.write(data)
760 self.offset += n
761 return n
762
763 def tell(self):
764 return self.offset
765
766 def flush(self):
767 self.fp.flush()
768
769 def close(self):
770 self.fp.close()
771
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200772
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000773class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 """File-like object for reading an archive member.
775 Is returned by ZipFile.open().
776 """
777
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000778 # Max size supported by decompressor.
779 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000781 # Read from compressed files in 4k blocks.
782 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000783
John Jolly066df4f2018-01-30 01:51:35 -0700784 # Chunk size to read during seek
785 MAX_SEEK_READ = 1 << 24
786
Łukasz Langae94980a2010-11-22 23:31:26 +0000787 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
788 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000789 self._fileobj = fileobj
790 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000791 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000792
Ezio Melotti92b47432010-01-28 01:44:41 +0000793 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000794 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200795 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000796
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200797 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800 self._readbuffer = b''
801 self._offset = 0
802
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000803 self.newlines = None
804
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000805 # Adjust read size for encrypted files since the first 12 bytes
806 # are for the encryption/password information.
807 if self._decrypter is not None:
808 self._compress_left -= 12
809
810 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811 self.name = zipinfo.filename
812
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000813 if hasattr(zipinfo, 'CRC'):
814 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000815 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000816 else:
817 self._expected_crc = None
818
John Jolly066df4f2018-01-30 01:51:35 -0700819 self._seekable = False
820 try:
821 if fileobj.seekable():
822 self._orig_compress_start = fileobj.tell()
823 self._orig_compress_size = zipinfo.compress_size
824 self._orig_file_size = zipinfo.file_size
825 self._orig_start_crc = self._running_crc
826 self._seekable = True
827 except AttributeError:
828 pass
829
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200830 def __repr__(self):
831 result = ['<%s.%s' % (self.__class__.__module__,
832 self.__class__.__qualname__)]
833 if not self.closed:
834 result.append(' name=%r mode=%r' % (self.name, self.mode))
835 if self._compress_type != ZIP_STORED:
836 result.append(' compress_type=%s' %
837 compressor_names.get(self._compress_type,
838 self._compress_type))
839 else:
840 result.append(' [closed]')
841 result.append('>')
842 return ''.join(result)
843
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000844 def readline(self, limit=-1):
845 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000847 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000849
Serhiy Storchakae670be22016-06-11 19:32:44 +0300850 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000851 # Shortcut common case - newline found in buffer.
852 i = self._readbuffer.find(b'\n', self._offset) + 1
853 if i > 0:
854 line = self._readbuffer[self._offset: i]
855 self._offset = i
856 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857
Serhiy Storchakae670be22016-06-11 19:32:44 +0300858 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000859
860 def peek(self, n=1):
861 """Returns buffered bytes without advancing the position."""
862 if n > len(self._readbuffer) - self._offset:
863 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200864 if len(chunk) > self._offset:
865 self._readbuffer = chunk + self._readbuffer[self._offset:]
866 self._offset = 0
867 else:
868 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000869
870 # Return up to 512 bytes to reduce allocation overhead for tight loops.
871 return self._readbuffer[self._offset: self._offset + 512]
872
873 def readable(self):
874 return True
875
876 def read(self, n=-1):
877 """Read and return up to n bytes.
Miss Islington (bot)0b9bd5b2019-03-14 21:52:59 -0700878 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200880 if n is None or n < 0:
881 buf = self._readbuffer[self._offset:]
882 self._readbuffer = b''
883 self._offset = 0
884 while not self._eof:
885 buf += self._read1(self.MAX_N)
886 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887
Antoine Pitrou78157b32012-06-23 16:44:48 +0200888 end = n + self._offset
889 if end < len(self._readbuffer):
890 buf = self._readbuffer[self._offset:end]
891 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200892 return buf
893
Antoine Pitrou78157b32012-06-23 16:44:48 +0200894 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200895 buf = self._readbuffer[self._offset:]
896 self._readbuffer = b''
897 self._offset = 0
898 while n > 0 and not self._eof:
899 data = self._read1(n)
900 if n < len(data):
901 self._readbuffer = data
902 self._offset = n
903 buf += data[:n]
904 break
905 buf += data
906 n -= len(data)
907 return buf
908
909 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000910 # Update the CRC using the given data.
911 if self._expected_crc is None:
912 # No need to compute the CRC if we don't have a reference value
913 return
Martin Panterb82032f2015-12-11 05:19:29 +0000914 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000915 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200916 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000917 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000918
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000919 def read1(self, n):
920 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200922 if n is None or n < 0:
923 buf = self._readbuffer[self._offset:]
924 self._readbuffer = b''
925 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300926 while not self._eof:
927 data = self._read1(self.MAX_N)
928 if data:
929 buf += data
930 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200931 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932
Antoine Pitrou78157b32012-06-23 16:44:48 +0200933 end = n + self._offset
934 if end < len(self._readbuffer):
935 buf = self._readbuffer[self._offset:end]
936 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200937 return buf
938
Antoine Pitrou78157b32012-06-23 16:44:48 +0200939 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 buf = self._readbuffer[self._offset:]
941 self._readbuffer = b''
942 self._offset = 0
943 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300944 while not self._eof:
945 data = self._read1(n)
946 if n < len(data):
947 self._readbuffer = data
948 self._offset = n
949 buf += data[:n]
950 break
951 if data:
952 buf += data
953 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200954 return buf
955
956 def _read1(self, n):
957 # Read up to n compressed bytes with at most one read() system call,
958 # decrypt and decompress them.
959 if self._eof or n <= 0:
960 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000961
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000962 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200963 if self._compress_type == ZIP_DEFLATED:
964 ## Handle unconsumed data.
965 data = self._decompressor.unconsumed_tail
966 if n > len(data):
967 data += self._read2(n - len(data))
968 else:
969 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200971 if self._compress_type == ZIP_STORED:
972 self._eof = self._compress_left <= 0
973 elif self._compress_type == ZIP_DEFLATED:
974 n = max(n, self.MIN_READ_SIZE)
975 data = self._decompressor.decompress(data, n)
976 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200977 self._compress_left <= 0 and
978 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200979 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000980 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200981 else:
982 data = self._decompressor.decompress(data)
983 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200985 data = data[:self._left]
986 self._left -= len(data)
987 if self._left <= 0:
988 self._eof = True
989 self._update_crc(data)
990 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000991
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200992 def _read2(self, n):
993 if self._compress_left <= 0:
994 return b''
995
996 n = max(n, self.MIN_READ_SIZE)
997 n = min(n, self._compress_left)
998
999 data = self._fileobj.read(n)
1000 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +02001001 if not data:
1002 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001003
1004 if self._decrypter is not None:
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001005 data = self._decrypter(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001006 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +00001007
Łukasz Langae94980a2010-11-22 23:31:26 +00001008 def close(self):
1009 try:
1010 if self._close_fileobj:
1011 self._fileobj.close()
1012 finally:
1013 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014
John Jolly066df4f2018-01-30 01:51:35 -07001015 def seekable(self):
1016 return self._seekable
1017
1018 def seek(self, offset, whence=0):
1019 if not self._seekable:
1020 raise io.UnsupportedOperation("underlying stream is not seekable")
1021 curr_pos = self.tell()
1022 if whence == 0: # Seek from start of file
1023 new_pos = offset
1024 elif whence == 1: # Seek from current position
1025 new_pos = curr_pos + offset
1026 elif whence == 2: # Seek from EOF
1027 new_pos = self._orig_file_size + offset
1028 else:
1029 raise ValueError("whence must be os.SEEK_SET (0), "
1030 "os.SEEK_CUR (1), or os.SEEK_END (2)")
1031
1032 if new_pos > self._orig_file_size:
1033 new_pos = self._orig_file_size
1034
1035 if new_pos < 0:
1036 new_pos = 0
1037
1038 read_offset = new_pos - curr_pos
1039 buff_offset = read_offset + self._offset
1040
1041 if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1042 # Just move the _offset index if the new position is in the _readbuffer
1043 self._offset = buff_offset
1044 read_offset = 0
1045 elif read_offset < 0:
1046 # Position is before the current position. Reset the ZipExtFile
John Jolly066df4f2018-01-30 01:51:35 -07001047 self._fileobj.seek(self._orig_compress_start)
1048 self._running_crc = self._orig_start_crc
1049 self._compress_left = self._orig_compress_size
1050 self._left = self._orig_file_size
1051 self._readbuffer = b''
1052 self._offset = 0
Miss Islington (bot)ad4f64d2018-07-29 12:57:21 -07001053 self._decompressor = _get_decompressor(self._compress_type)
John Jolly066df4f2018-01-30 01:51:35 -07001054 self._eof = False
1055 read_offset = new_pos
1056
1057 while read_offset > 0:
1058 read_len = min(self.MAX_SEEK_READ, read_offset)
1059 self.read(read_len)
1060 read_offset -= read_len
1061
1062 return self.tell()
1063
1064 def tell(self):
1065 if not self._seekable:
1066 raise io.UnsupportedOperation("underlying stream is not seekable")
1067 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1068 return filepos
1069
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001070
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001071class _ZipWriteFile(io.BufferedIOBase):
1072 def __init__(self, zf, zinfo, zip64):
1073 self._zinfo = zinfo
1074 self._zip64 = zip64
1075 self._zipfile = zf
Bo Baylesce237c72018-01-29 23:54:07 -06001076 self._compressor = _get_compressor(zinfo.compress_type,
1077 zinfo._compresslevel)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001078 self._file_size = 0
1079 self._compress_size = 0
1080 self._crc = 0
1081
1082 @property
1083 def _fileobj(self):
1084 return self._zipfile.fp
1085
1086 def writable(self):
1087 return True
1088
1089 def write(self, data):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001090 if self.closed:
1091 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001092 nbytes = len(data)
1093 self._file_size += nbytes
1094 self._crc = crc32(data, self._crc)
1095 if self._compressor:
1096 data = self._compressor.compress(data)
1097 self._compress_size += len(data)
1098 self._fileobj.write(data)
1099 return nbytes
1100
1101 def close(self):
Serhiy Storchaka4c0d9ea2017-04-12 16:03:23 +03001102 if self.closed:
1103 return
Miss Islington (bot)4724ba92019-03-30 06:52:16 -07001104 try:
1105 super().close()
1106 # Flush any data from the compressor, and update header info
1107 if self._compressor:
1108 buf = self._compressor.flush()
1109 self._compress_size += len(buf)
1110 self._fileobj.write(buf)
1111 self._zinfo.compress_size = self._compress_size
1112 else:
1113 self._zinfo.compress_size = self._file_size
1114 self._zinfo.CRC = self._crc
1115 self._zinfo.file_size = self._file_size
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001116
Miss Islington (bot)4724ba92019-03-30 06:52:16 -07001117 # Write updated header info
1118 if self._zinfo.flag_bits & 0x08:
1119 # Write CRC and file sizes after the file data
1120 fmt = '<LLQQ' if self._zip64 else '<LLLL'
1121 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1122 self._zinfo.compress_size, self._zinfo.file_size))
1123 self._zipfile.start_dir = self._fileobj.tell()
1124 else:
1125 if not self._zip64:
1126 if self._file_size > ZIP64_LIMIT:
1127 raise RuntimeError(
1128 'File size unexpectedly exceeded ZIP64 limit')
1129 if self._compress_size > ZIP64_LIMIT:
1130 raise RuntimeError(
1131 'Compressed size unexpectedly exceeded ZIP64 limit')
1132 # Seek backwards and write file header (which will now include
1133 # correct CRC and file sizes)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001134
Miss Islington (bot)4724ba92019-03-30 06:52:16 -07001135 # Preserve current position in file
1136 self._zipfile.start_dir = self._fileobj.tell()
1137 self._fileobj.seek(self._zinfo.header_offset)
1138 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1139 self._fileobj.seek(self._zipfile.start_dir)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001140
Miss Islington (bot)4724ba92019-03-30 06:52:16 -07001141 # Successfully written: Add file to our caches
1142 self._zipfile.filelist.append(self._zinfo)
1143 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1144 finally:
1145 self._zipfile._writing = False
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001146
Miss Islington (bot)4724ba92019-03-30 06:52:16 -07001147
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001148
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001149class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001150 """ Class with methods to open, read, write, close, list zip files.
1151
Bo Baylesce237c72018-01-29 23:54:07 -06001152 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1153 compresslevel=None)
Tim Petersa19a1682001-03-29 04:36:09 +00001154
Fred Drake3d9091e2001-03-26 15:49:24 +00001155 file: Either the path to the file, or a file-like object.
1156 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001157 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1158 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001159 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1160 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1162 needed, otherwise it will raise an exception when this would
1163 be necessary.
Bo Baylesce237c72018-01-29 23:54:07 -06001164 compresslevel: None (default for the given compression type) or an integer
1165 specifying the level to pass to the compressor.
1166 When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1167 When using ZIP_DEFLATED integers 0 through 9 are accepted.
1168 When using ZIP_BZIP2 integers 1 through 9 are accepted.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001169
Fred Drake3d9091e2001-03-26 15:49:24 +00001170 """
Fred Drake484d7352000-10-02 21:14:52 +00001171
Fred Drake90eac282001-02-28 05:29:34 +00001172 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001173 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001174
Bo Baylesce237c72018-01-29 23:54:07 -06001175 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1176 compresslevel=None):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001177 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1178 or append 'a'."""
1179 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001180 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001181
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001182 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001183
1184 self._allowZip64 = allowZip64
1185 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001186 self.debug = 0 # Level of printing: 0 through 3
1187 self.NameToInfo = {} # Find file info given name
1188 self.filelist = [] # List of ZipInfo instances for archive
1189 self.compression = compression # Method of compression
Bo Baylesce237c72018-01-29 23:54:07 -06001190 self.compresslevel = compresslevel
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001191 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001192 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001193 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001194
Fred Drake3d9091e2001-03-26 15:49:24 +00001195 # Check if we were passed a file-like object
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001196 if isinstance(file, os.PathLike):
1197 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001198 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001199 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001200 self._filePassed = 0
1201 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001202 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1203 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001204 filemode = modeDict[mode]
1205 while True:
1206 try:
1207 self.fp = io.open(file, filemode)
1208 except OSError:
1209 if filemode in modeDict:
1210 filemode = modeDict[filemode]
1211 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001212 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001213 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001214 else:
1215 self._filePassed = 1
1216 self.fp = file
1217 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001218 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001219 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001220 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001221 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001222
Antoine Pitrou17babc52012-11-17 23:50:08 +01001223 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001224 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001225 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001226 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001227 # set the modified flag so central directory gets written
1228 # even if no files are added to the archive
1229 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001230 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001231 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001232 except (AttributeError, OSError):
1233 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001234 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001235 self._seekable = False
1236 else:
1237 # Some file-like objects can provide tell() but not seek()
1238 try:
1239 self.fp.seek(self.start_dir)
1240 except (AttributeError, OSError):
1241 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001242 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001243 try:
1244 # See if file is a zip file
1245 self._RealGetContents()
1246 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001247 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001248 except BadZipFile:
1249 # file is not a zip file, just append
1250 self.fp.seek(0, 2)
1251
1252 # set the modified flag so central directory gets written
1253 # even if no files are added to the archive
1254 self._didModify = True
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001255 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001256 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001257 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001258 except:
1259 fp = self.fp
1260 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001261 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001262 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001264 def __enter__(self):
1265 return self
1266
1267 def __exit__(self, type, value, traceback):
1268 self.close()
1269
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001270 def __repr__(self):
1271 result = ['<%s.%s' % (self.__class__.__module__,
1272 self.__class__.__qualname__)]
1273 if self.fp is not None:
1274 if self._filePassed:
1275 result.append(' file=%r' % self.fp)
1276 elif self.filename is not None:
1277 result.append(' filename=%r' % self.filename)
1278 result.append(' mode=%r' % self.mode)
1279 else:
1280 result.append(' [closed]')
1281 result.append('>')
1282 return ''.join(result)
1283
Tim Peters7d3bad62001-04-04 18:56:49 +00001284 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001285 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001286 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001287 try:
1288 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001289 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001290 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001291 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001292 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001294 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001295 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1296 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001297 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001298
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001299 # "concat" is zero, unless zip was concatenated to another file
1300 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001301 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1302 # If Zip64 extension structures are present, account for them
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001303 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001304
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305 if self.debug > 2:
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001306 inferred = concat + offset_cd
1307 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 # self.start_dir: Position of start of central directory
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001309 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001311 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001312 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 total = 0
1314 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001315 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001316 if len(centdir) != sizeCentralDir:
1317 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001319 if centdir[_CD_SIGNATURE] != stringCentralDir:
1320 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001322 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001323 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001324 flags = centdir[5]
1325 if flags & 0x800:
1326 # UTF-8 file names extension
1327 filename = filename.decode('utf-8')
1328 else:
1329 # Historical ZIP filename encoding
1330 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001331 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001332 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001333 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1334 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001335 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001336 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001337 x.flag_bits, x.compress_type, t, d,
1338 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001339 if x.extract_version > MAX_EXTRACT_VERSION:
1340 raise NotImplementedError("zip file version %.1f" %
1341 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1343 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001344 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001346 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001347
1348 x._decodeExtra()
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001349 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 self.filelist.append(x)
1351 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001352
1353 # update total bytes read from central directory
1354 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1355 + centdir[_CD_EXTRA_FIELD_LENGTH]
1356 + centdir[_CD_COMMENT_LENGTH])
1357
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001359 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001360
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001361
1362 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001363 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001364 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001365
1366 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001367 """Return a list of class ZipInfo instances for files in the
1368 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001369 return self.filelist
1370
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001371 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001372 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001373 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1374 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001375 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001376 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001377 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1378 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379
1380 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001381 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001382 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001383 for zinfo in self.filelist:
1384 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001385 # Read by chunks, to avoid an OverflowError or a
1386 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001387 with self.open(zinfo.filename, "r") as f:
1388 while f.read(chunk_size): # Check CRC-32
1389 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001390 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001391 return zinfo.filename
1392
1393 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001394 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001395 info = self.NameToInfo.get(name)
1396 if info is None:
1397 raise KeyError(
1398 'There is no item named %r in the archive' % name)
1399
1400 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001401
Thomas Wouterscf297e42007-02-23 15:07:44 +00001402 def setpassword(self, pwd):
1403 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001404 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001405 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001406 if pwd:
1407 self.pwd = pwd
1408 else:
1409 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001410
R David Murrayf50b38a2012-04-12 18:44:58 -04001411 @property
1412 def comment(self):
1413 """The comment text associated with the ZIP file."""
1414 return self._comment
1415
1416 @comment.setter
1417 def comment(self, comment):
1418 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001419 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001420 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001421 if len(comment) > ZIP_MAX_COMMENT:
1422 import warnings
1423 warnings.warn('Archive comment is too long; truncating to %d bytes'
1424 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001425 comment = comment[:ZIP_MAX_COMMENT]
1426 self._comment = comment
1427 self._didModify = True
1428
Thomas Wouterscf297e42007-02-23 15:07:44 +00001429 def read(self, name, pwd=None):
Miss Islington (bot)89a30872018-11-25 01:30:37 -08001430 """Return file bytes for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001431 with self.open(name, "r", pwd) as fp:
1432 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001433
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001434 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001435 """Return file-like object for 'name'.
1436
1437 name is a string for the file name within the ZIP file, or a ZipInfo
1438 object.
1439
1440 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1441 write to a file newly added to the archive.
1442
1443 pwd is the password to decrypt files (only used for reading).
1444
1445 When writing, if the file size is not known in advance but may exceed
1446 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1447 files. If the size is known in advance, it is best to pass a ZipInfo
1448 instance for name, with zinfo.file_size set.
1449 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001450 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001451 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001452 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001453 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001454 if pwd and (mode == "w"):
1455 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001456 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001457 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001458 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001459
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001460 # Make sure we have an info object
1461 if isinstance(name, ZipInfo):
1462 # 'name' is already an info object
1463 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001464 elif mode == 'w':
1465 zinfo = ZipInfo(name)
1466 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001467 zinfo._compresslevel = self.compresslevel
Guido van Rossumd8faa362007-04-27 19:54:29 +00001468 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001469 # Get info object for name
1470 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001471
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001472 if mode == 'w':
1473 return self._open_to_write(zinfo, force_zip64=force_zip64)
1474
1475 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001476 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001477 "is an open writing handle on it. "
1478 "Close the writing handle before trying to read.")
1479
1480 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001481 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001482 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1483 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001484 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001485 # Skip the file header:
1486 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001487 if len(fheader) != sizeFileHeader:
1488 raise BadZipFile("Truncated file header")
1489 fheader = struct.unpack(structFileHeader, fheader)
1490 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001491 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001492
Antoine Pitrou17babc52012-11-17 23:50:08 +01001493 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1494 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1495 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001496
Antoine Pitrou8572da52012-11-17 23:52:05 +01001497 if zinfo.flag_bits & 0x20:
1498 # Zip 2.7: compressed patched data
1499 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001500
Antoine Pitrou8572da52012-11-17 23:52:05 +01001501 if zinfo.flag_bits & 0x40:
1502 # strong encryption
1503 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001504
Antoine Pitrou17babc52012-11-17 23:50:08 +01001505 if zinfo.flag_bits & 0x800:
1506 # UTF-8 filename
1507 fname_str = fname.decode("utf-8")
1508 else:
1509 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001510
Antoine Pitrou17babc52012-11-17 23:50:08 +01001511 if fname_str != zinfo.orig_filename:
1512 raise BadZipFile(
1513 'File name in directory %r and header %r differ.'
1514 % (zinfo.orig_filename, fname))
1515
1516 # check for encrypted flag & handle password
1517 is_encrypted = zinfo.flag_bits & 0x1
1518 zd = None
1519 if is_encrypted:
1520 if not pwd:
1521 pwd = self.pwd
1522 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001523 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001524 "required for extraction" % name)
1525
1526 zd = _ZipDecrypter(pwd)
1527 # The first 12 bytes in the cypher stream is an encryption header
1528 # used to strengthen the algorithm. The first 11 bytes are
1529 # completely random, while the 12th contains the MSB of the CRC,
1530 # or the MSB of the file time depending on the header type
1531 # and is used to check the correctness of the password.
1532 header = zef_file.read(12)
Serhiy Storchaka06e52252017-03-30 19:09:08 +03001533 h = zd(header[0:12])
Antoine Pitrou17babc52012-11-17 23:50:08 +01001534 if zinfo.flag_bits & 0x8:
1535 # compare against the file type from extended local headers
1536 check_byte = (zinfo._raw_time >> 8) & 0xff
1537 else:
1538 # compare against the CRC otherwise
1539 check_byte = (zinfo.CRC >> 24) & 0xff
1540 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001541 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001542
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001543 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001544 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001545 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001546 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001547
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001548 def _open_to_write(self, zinfo, force_zip64=False):
1549 if force_zip64 and not self._allowZip64:
1550 raise ValueError(
1551 "force_zip64 is True, but allowZip64 was False when opening "
1552 "the ZIP file."
1553 )
1554 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001555 raise ValueError("Can't write to the ZIP file while there is "
1556 "another write handle open on it. "
1557 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001558
1559 # Sizes and CRC are overwritten with correct data after processing the file
1560 if not hasattr(zinfo, 'file_size'):
1561 zinfo.file_size = 0
1562 zinfo.compress_size = 0
1563 zinfo.CRC = 0
1564
1565 zinfo.flag_bits = 0x00
1566 if zinfo.compress_type == ZIP_LZMA:
1567 # Compressed data includes an end-of-stream (EOS) marker
1568 zinfo.flag_bits |= 0x02
1569 if not self._seekable:
1570 zinfo.flag_bits |= 0x08
1571
1572 if not zinfo.external_attr:
1573 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1574
1575 # Compressed size can be larger than uncompressed size
1576 zip64 = self._allowZip64 and \
1577 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1578
1579 if self._seekable:
1580 self.fp.seek(self.start_dir)
1581 zinfo.header_offset = self.fp.tell()
1582
1583 self._writecheck(zinfo)
1584 self._didModify = True
1585
1586 self.fp.write(zinfo.FileHeader(zip64))
1587
1588 self._writing = True
1589 return _ZipWriteFile(self, zinfo, zip64)
1590
Christian Heimes790c8232008-01-07 21:14:23 +00001591 def extract(self, member, path=None, pwd=None):
1592 """Extract a member from the archive to the current working directory,
1593 using its full name. Its file information is extracted as accurately
1594 as possible. `member' may be a filename or a ZipInfo object. You can
1595 specify a different directory using `path'.
1596 """
Christian Heimes790c8232008-01-07 21:14:23 +00001597 if path is None:
1598 path = os.getcwd()
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001599 else:
1600 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001601
1602 return self._extract_member(member, path, pwd)
1603
1604 def extractall(self, path=None, members=None, pwd=None):
1605 """Extract all members from the archive to the current working
1606 directory. `path' specifies a different directory to extract to.
1607 `members' is optional and must be a subset of the list returned
1608 by namelist().
1609 """
1610 if members is None:
1611 members = self.namelist()
1612
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001613 if path is None:
1614 path = os.getcwd()
1615 else:
1616 path = os.fspath(path)
1617
Christian Heimes790c8232008-01-07 21:14:23 +00001618 for zipinfo in members:
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001619 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001620
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001621 @classmethod
1622 def _sanitize_windows_name(cls, arcname, pathsep):
1623 """Replace bad characters and remove trailing dots from parts."""
1624 table = cls._windows_illegal_name_trans_table
1625 if not table:
1626 illegal = ':<>|"?*'
1627 table = str.maketrans(illegal, '_' * len(illegal))
1628 cls._windows_illegal_name_trans_table = table
1629 arcname = arcname.translate(table)
1630 # remove trailing dots
1631 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1632 # rejoin, removing empty parts.
1633 arcname = pathsep.join(x for x in arcname if x)
1634 return arcname
1635
Christian Heimes790c8232008-01-07 21:14:23 +00001636 def _extract_member(self, member, targetpath, pwd):
1637 """Extract the ZipInfo object 'member' to a physical
1638 file on the path targetpath.
1639 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001640 if not isinstance(member, ZipInfo):
1641 member = self.getinfo(member)
1642
Christian Heimes790c8232008-01-07 21:14:23 +00001643 # build the destination pathname, replacing
1644 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001645 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001646
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001647 if os.path.altsep:
1648 arcname = arcname.replace(os.path.altsep, os.path.sep)
1649 # interpret absolute pathname as relative, remove drive letter or
1650 # UNC path, redundant separators, "." and ".." components.
1651 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001652 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001653 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001654 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001655 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001656 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001657 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001658
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001659 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001660 targetpath = os.path.normpath(targetpath)
1661
1662 # Create all upper directories if necessary.
1663 upperdirs = os.path.dirname(targetpath)
1664 if upperdirs and not os.path.exists(upperdirs):
1665 os.makedirs(upperdirs)
1666
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001667 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001668 if not os.path.isdir(targetpath):
1669 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001670 return targetpath
1671
Antoine Pitrou17babc52012-11-17 23:50:08 +01001672 with self.open(member, pwd=pwd) as source, \
1673 open(targetpath, "wb") as target:
1674 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001675
1676 return targetpath
1677
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001678 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001679 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001680 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001681 import warnings
1682 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001683 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001684 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001685 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001686 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001687 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001688 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001689 if not self._allowZip64:
1690 requires_zip64 = None
1691 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1692 requires_zip64 = "Files count"
1693 elif zinfo.file_size > ZIP64_LIMIT:
1694 requires_zip64 = "Filesize"
1695 elif zinfo.header_offset > ZIP64_LIMIT:
1696 requires_zip64 = "Zipfile size"
1697 if requires_zip64:
1698 raise LargeZipFile(requires_zip64 +
1699 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001700
Bo Baylesce237c72018-01-29 23:54:07 -06001701 def write(self, filename, arcname=None,
1702 compress_type=None, compresslevel=None):
Fred Drake484d7352000-10-02 21:14:52 +00001703 """Put the bytes from filename into the archive under the name
1704 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001705 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001706 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001707 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001708 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001709 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001710 "Can't write to ZIP archive while an open writing handle exists"
1711 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001712
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001713 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001714
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001715 if zinfo.is_dir():
1716 zinfo.compress_size = 0
1717 zinfo.CRC = 0
1718 else:
1719 if compress_type is not None:
1720 zinfo.compress_type = compress_type
1721 else:
1722 zinfo.compress_type = self.compression
1723
Bo Baylesce237c72018-01-29 23:54:07 -06001724 if compresslevel is not None:
1725 zinfo._compresslevel = compresslevel
1726 else:
1727 zinfo._compresslevel = self.compresslevel
1728
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001729 if zinfo.is_dir():
1730 with self._lock:
1731 if self._seekable:
1732 self.fp.seek(self.start_dir)
1733 zinfo.header_offset = self.fp.tell() # Start of header bytes
1734 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001735 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001736 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001737
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001738 self._writecheck(zinfo)
1739 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001740
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001741 self.filelist.append(zinfo)
1742 self.NameToInfo[zinfo.filename] = zinfo
1743 self.fp.write(zinfo.FileHeader(False))
1744 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001745 else:
1746 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1747 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001748
Bo Baylesce237c72018-01-29 23:54:07 -06001749 def writestr(self, zinfo_or_arcname, data,
1750 compress_type=None, compresslevel=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001751 """Write a file into the archive. The contents is 'data', which
1752 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1753 it is encoded as UTF-8 first.
1754 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001755 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001756 if isinstance(data, str):
1757 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001758 if not isinstance(zinfo_or_arcname, ZipInfo):
1759 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001760 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001761 zinfo.compress_type = self.compression
Bo Baylesce237c72018-01-29 23:54:07 -06001762 zinfo._compresslevel = self.compresslevel
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001763 if zinfo.filename[-1] == '/':
1764 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1765 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1766 else:
1767 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001768 else:
1769 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001770
1771 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001772 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001773 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001774 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001775 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001776 "Can't write to ZIP archive while an open writing handle exists."
1777 )
1778
1779 if compress_type is not None:
1780 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001781
Bo Baylesce237c72018-01-29 23:54:07 -06001782 if compresslevel is not None:
1783 zinfo._compresslevel = compresslevel
1784
Guido van Rossum85825dc2007-08-27 17:03:28 +00001785 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001786 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001787 with self.open(zinfo, mode='w') as dest:
1788 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001789
1790 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001791 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001792 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001793
1794 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001795 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001796 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001797 if self.fp is None:
1798 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001799
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001800 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001801 raise ValueError("Can't close the ZIP file while there is "
1802 "an open writing handle on it. "
1803 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001804
Antoine Pitrou17babc52012-11-17 23:50:08 +01001805 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001806 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001807 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001808 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001809 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001810 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001811 finally:
1812 fp = self.fp
1813 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001814 self._fpclose(fp)
1815
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001816 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001817 for zinfo in self.filelist: # write central directory
1818 dt = zinfo.date_time
1819 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1820 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1821 extra = []
1822 if zinfo.file_size > ZIP64_LIMIT \
1823 or zinfo.compress_size > ZIP64_LIMIT:
1824 extra.append(zinfo.file_size)
1825 extra.append(zinfo.compress_size)
1826 file_size = 0xffffffff
1827 compress_size = 0xffffffff
1828 else:
1829 file_size = zinfo.file_size
1830 compress_size = zinfo.compress_size
1831
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001832 if zinfo.header_offset > ZIP64_LIMIT:
1833 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001834 header_offset = 0xffffffff
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001835 else:
1836 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001837
1838 extra_data = zinfo.extra
1839 min_version = 0
1840 if extra:
1841 # Append a ZIP64 field to the extra's
Miss Islington (bot)efdf3162018-09-17 06:08:45 -07001842 extra_data = _strip_extra(extra_data, (1,))
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001843 extra_data = struct.pack(
1844 '<HH' + 'Q'*len(extra),
1845 1, 8*len(extra), *extra) + extra_data
1846
1847 min_version = ZIP64_VERSION
1848
1849 if zinfo.compress_type == ZIP_BZIP2:
1850 min_version = max(BZIP2_VERSION, min_version)
1851 elif zinfo.compress_type == ZIP_LZMA:
1852 min_version = max(LZMA_VERSION, min_version)
1853
1854 extract_version = max(min_version, zinfo.extract_version)
1855 create_version = max(min_version, zinfo.create_version)
1856 try:
1857 filename, flag_bits = zinfo._encodeFilenameFlags()
1858 centdir = struct.pack(structCentralDir,
1859 stringCentralDir, create_version,
1860 zinfo.create_system, extract_version, zinfo.reserved,
1861 flag_bits, zinfo.compress_type, dostime, dosdate,
1862 zinfo.CRC, compress_size, file_size,
1863 len(filename), len(extra_data), len(zinfo.comment),
1864 0, zinfo.internal_attr, zinfo.external_attr,
1865 header_offset)
1866 except DeprecationWarning:
1867 print((structCentralDir, stringCentralDir, create_version,
1868 zinfo.create_system, extract_version, zinfo.reserved,
1869 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1870 zinfo.CRC, compress_size, file_size,
1871 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1872 0, zinfo.internal_attr, zinfo.external_attr,
1873 header_offset), file=sys.stderr)
1874 raise
1875 self.fp.write(centdir)
1876 self.fp.write(filename)
1877 self.fp.write(extra_data)
1878 self.fp.write(zinfo.comment)
1879
1880 pos2 = self.fp.tell()
1881 # Write end-of-zip-archive record
1882 centDirCount = len(self.filelist)
1883 centDirSize = pos2 - self.start_dir
Serhiy Storchaka3763ea82017-05-06 14:46:01 +03001884 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001885 requires_zip64 = None
1886 if centDirCount > ZIP_FILECOUNT_LIMIT:
1887 requires_zip64 = "Files count"
1888 elif centDirOffset > ZIP64_LIMIT:
1889 requires_zip64 = "Central directory offset"
1890 elif centDirSize > ZIP64_LIMIT:
1891 requires_zip64 = "Central directory size"
1892 if requires_zip64:
1893 # Need to write the ZIP64 end-of-archive records
1894 if not self._allowZip64:
1895 raise LargeZipFile(requires_zip64 +
1896 " would require ZIP64 extensions")
1897 zip64endrec = struct.pack(
1898 structEndArchive64, stringEndArchive64,
1899 44, 45, 45, 0, 0, centDirCount, centDirCount,
1900 centDirSize, centDirOffset)
1901 self.fp.write(zip64endrec)
1902
1903 zip64locrec = struct.pack(
1904 structEndArchive64Locator,
1905 stringEndArchive64Locator, 0, pos2, 1)
1906 self.fp.write(zip64locrec)
1907 centDirCount = min(centDirCount, 0xFFFF)
1908 centDirSize = min(centDirSize, 0xFFFFFFFF)
1909 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1910
1911 endrec = struct.pack(structEndArchive, stringEndArchive,
1912 0, 0, centDirCount, centDirCount,
1913 centDirSize, centDirOffset, len(self._comment))
1914 self.fp.write(endrec)
1915 self.fp.write(self._comment)
1916 self.fp.flush()
1917
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001918 def _fpclose(self, fp):
1919 assert self._fileRefCnt > 0
1920 self._fileRefCnt -= 1
1921 if not self._fileRefCnt and not self._filePassed:
1922 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001923
1924
1925class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001926 """Class to create ZIP archives with Python library files and packages."""
1927
Georg Brandl8334fd92010-12-04 10:26:46 +00001928 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001929 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001930 ZipFile.__init__(self, file, mode=mode, compression=compression,
1931 allowZip64=allowZip64)
1932 self._optimize = optimize
1933
Christian Tismer59202e52013-10-21 03:59:23 +02001934 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001935 """Add all files from "pathname" to the ZIP archive.
1936
Fred Drake484d7352000-10-02 21:14:52 +00001937 If pathname is a package directory, search the directory and
1938 all package subdirectories recursively for all *.py and enter
1939 the modules into the archive. If pathname is a plain
1940 directory, listdir *.py and enter all modules. Else, pathname
1941 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001942 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001943 This method will compile the module.py into module.pyc if
1944 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001945 If filterfunc(pathname) is given, it is called with every argument.
1946 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001947 """
Serhiy Storchaka8606e952017-03-08 14:37:51 +02001948 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001949 if filterfunc and not filterfunc(pathname):
1950 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001951 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001952 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001953 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001954 dir, name = os.path.split(pathname)
1955 if os.path.isdir(pathname):
1956 initname = os.path.join(pathname, "__init__.py")
1957 if os.path.isfile(initname):
1958 # This is a package directory, add it
1959 if basename:
1960 basename = "%s/%s" % (basename, name)
1961 else:
1962 basename = name
1963 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001964 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001965 fname, arcname = self._get_codename(initname[0:-3], basename)
1966 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001967 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001968 self.write(fname, arcname)
Bernhard M. Wiedemann57750be2018-01-31 11:17:10 +01001969 dirlist = sorted(os.listdir(pathname))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001970 dirlist.remove("__init__.py")
1971 # Add all *.py files and package subdirectories
1972 for filename in dirlist:
1973 path = os.path.join(pathname, filename)
1974 root, ext = os.path.splitext(filename)
1975 if os.path.isdir(path):
1976 if os.path.isfile(os.path.join(path, "__init__.py")):
1977 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001978 self.writepy(path, basename,
1979 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001980 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001981 if filterfunc and not filterfunc(path):
1982 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001983 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001984 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001985 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001986 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001987 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001988 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001989 self.write(fname, arcname)
1990 else:
1991 # This is NOT a package directory, add its files at top level
1992 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001993 print("Adding files from directory", pathname)
Bernhard M. Wiedemann57750be2018-01-31 11:17:10 +01001994 for filename in sorted(os.listdir(pathname)):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001995 path = os.path.join(pathname, filename)
1996 root, ext = os.path.splitext(filename)
1997 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001998 if filterfunc and not filterfunc(path):
1999 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03002000 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02002001 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002002 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02002003 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002004 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002005 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002006 self.write(fname, arcname)
2007 else:
2008 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00002009 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02002010 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002011 fname, arcname = self._get_codename(pathname[0:-3], basename)
2012 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002013 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002014 self.write(fname, arcname)
2015
2016 def _get_codename(self, pathname, basename):
2017 """Return (filename, archivename) for the path.
2018
Fred Drake484d7352000-10-02 21:14:52 +00002019 Given a module name path, return the correct file path and
2020 archive name, compiling if necessary. For example, given
2021 /python/lib/string, return (/python/lib/string.pyc, string).
2022 """
Georg Brandl8334fd92010-12-04 10:26:46 +00002023 def _compile(file, optimize=-1):
2024 import py_compile
2025 if self.debug:
2026 print("Compiling", file)
2027 try:
2028 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02002029 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00002030 print(err.msg)
2031 return False
2032 return True
2033
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002034 file_py = pathname + ".py"
2035 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04002036 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2037 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2038 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00002039 if self._optimize == -1:
2040 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04002041 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00002042 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2043 # Use .pyc file.
2044 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002045 elif (os.path.isfile(pycache_opt0) and
2046 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00002047 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2048 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002049 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002050 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04002051 elif (os.path.isfile(pycache_opt1) and
2052 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2053 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002054 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04002055 fname = pycache_opt1
2056 arcname = file_pyc
2057 elif (os.path.isfile(pycache_opt2) and
2058 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2059 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2060 # file name in the archive.
2061 fname = pycache_opt2
2062 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00002063 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00002064 # Compile py into PEP 3147 pyc file.
2065 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04002066 if sys.flags.optimize == 0:
2067 fname = pycache_opt0
2068 elif sys.flags.optimize == 1:
2069 fname = pycache_opt1
2070 else:
2071 fname = pycache_opt2
2072 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00002073 else:
2074 fname = arcname = file_py
2075 else:
2076 # new mode: use given optimization level
2077 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04002078 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00002079 arcname = file_pyc
2080 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04002081 arcname = file_pyc
2082 if self._optimize == 1:
2083 fname = pycache_opt1
2084 elif self._optimize == 2:
2085 fname = pycache_opt2
2086 else:
2087 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2088 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00002089 if not (os.path.isfile(fname) and
2090 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2091 if not _compile(file_py, optimize=self._optimize):
2092 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00002093 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00002094 if basename:
2095 archivename = "%s/%s" % (basename, archivename)
2096 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002097
2098
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002099def main(args=None):
2100 import argparse
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002101
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002102 description = 'A simple command-line interface for zipfile module.'
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002103 parser = argparse.ArgumentParser(description=description)
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002104 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002105 group.add_argument('-l', '--list', metavar='<zipfile>',
2106 help='Show listing of a zipfile')
2107 group.add_argument('-e', '--extract', nargs=2,
2108 metavar=('<zipfile>', '<output_dir>'),
2109 help='Extract zipfile into target dir')
2110 group.add_argument('-c', '--create', nargs='+',
2111 metavar=('<name>', '<file>'),
2112 help='Create zipfile from sources')
2113 group.add_argument('-t', '--test', metavar='<zipfile>',
2114 help='Test if a zipfile is valid')
2115 args = parser.parse_args(args)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002116
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002117 if args.test is not None:
2118 src = args.test
2119 with ZipFile(src, 'r') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01002120 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002121 if badfile:
2122 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002123 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002124
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002125 elif args.list is not None:
2126 src = args.list
2127 with ZipFile(src, 'r') as zf:
2128 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002129
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002130 elif args.extract is not None:
2131 src, curdir = args.extract
2132 with ZipFile(src, 'r') as zf:
2133 zf.extractall(curdir)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002134
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002135 elif args.create is not None:
2136 zip_name = args.create.pop(0)
2137 files = args.create
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002138
2139 def addToZip(zf, path, zippath):
2140 if os.path.isfile(path):
2141 zf.write(path, zippath, ZIP_DEFLATED)
2142 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002143 if zippath:
2144 zf.write(path, zippath)
Bernhard M. Wiedemann57750be2018-01-31 11:17:10 +01002145 for nm in sorted(os.listdir(path)):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002146 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002147 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002148 # else: ignore
2149
Serhiy Storchaka8c933102016-10-23 13:32:12 +03002150 with ZipFile(zip_name, 'w') as zf:
2151 for path in files:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002152 zippath = os.path.basename(path)
2153 if not zippath:
2154 zippath = os.path.basename(os.path.dirname(path))
2155 if zippath in ('', os.curdir, os.pardir):
2156 zippath = ''
2157 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002158
2159if __name__ == "__main__":
2160 main()