blob: 27a4c713e766016fd177234e24dc93c0db2cce87 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Serhiy Storchaka9e777732015-10-10 19:43:32 +030017try:
18 import threading
19except ImportError:
20 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021
22try:
Tim Peterse1190062001-01-15 03:34:38 +000023 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040025except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000027 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029try:
30 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020032 bz2 = None
33
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034try:
35 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040036except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 lzma = None
38
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020039__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020040 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000041 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Georg Brandl4d540882010-10-28 06:42:33 +000043class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
Georg Brandl4d540882010-10-28 06:42:33 +000053error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000056ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030057ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000058ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020064ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000065# Other ZIP compression methods not supported
66
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020067DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020071# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020072MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020073
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000079
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000082structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000102stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000103sizeCentralDir = struct.calcsize(structCentralDir)
104
Fred Drake3e038e52001-02-28 17:56:26 +0000105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000129stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130sizeFileHeader = struct.calcsize(structFileHeader)
131
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000167def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000169 if _EndRecData(fp):
170 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200171 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000172 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000173 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000174
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000175def is_zipfile(filename):
176 """Quickly see if a file is a ZIP file by checking the magic number.
177
178 The filename argument may be a file or file-like object too.
179 """
180 result = False
181 try:
182 if hasattr(filename, "read"):
183 result = _check_zipfile(fp=filename)
184 else:
185 with open(filename, "rb") as fp:
186 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200187 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188 pass
189 return result
190
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000191def _EndRecData64(fpin, offset, endrec):
192 """
193 Read the ZIP64 end-of-archive records and use that to update endrec
194 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 try:
196 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200197 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000198 # If the seek fails, the file is not large enough to contain a ZIP64
199 # end-of-archive record, so just return the end record we were given.
200 return endrec
201
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000202 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200203 if len(data) != sizeEndCentDir64Locator:
204 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
206 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207 return endrec
208
209 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000210 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000211
212 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
214 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200215 if len(data) != sizeEndCentDir64:
216 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200218 dircount, dircount2, dirsize, diroffset = \
219 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000220 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000221 return endrec
222
223 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000224 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 endrec[_ECD_DISK_NUMBER] = disk_num
226 endrec[_ECD_DISK_START] = disk_dir
227 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
228 endrec[_ECD_ENTRIES_TOTAL] = dircount2
229 endrec[_ECD_SIZE] = dirsize
230 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000231 return endrec
232
233
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234def _EndRecData(fpin):
235 """Return data from the "End of Central Directory" record, or None.
236
237 The data is a list of the nine items in the ZIP "End of central dir"
238 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000239
240 # Determine file size
241 fpin.seek(0, 2)
242 filesize = fpin.tell()
243
244 # Check to see if this is ZIP file with no archive comment (the
245 # "end of central directory" structure should be the last item in the
246 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 try:
248 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000250 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000251 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200252 if (len(data) == sizeEndCentDir and
253 data[0:4] == stringEndArchive and
254 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000256 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257 endrec=list(endrec)
258
259 # Append a blank comment and record start offset
260 endrec.append(b"")
261 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000263 # Try to read the "Zip64 end of central directory" structure
264 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265
266 # Either this is not a ZIP file, or it is a ZIP file with an archive
267 # comment. Search the end of the file for the "end of central directory"
268 # record signature. The comment is the last item in the ZIP file and may be
269 # up to 64K long. It is assumed that the "end of central directory" magic
270 # number does not appear in the comment.
271 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
272 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000273 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000274 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 if start >= 0:
276 # found the magic number; attempt to unpack and interpret
277 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200278 if len(recData) != sizeEndCentDir:
279 # Zip file is corrupted.
280 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000281 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
283 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
284 endrec.append(comment)
285 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000286
R David Murray4fbb9db2011-06-09 15:50:51 -0400287 # Try to read the "Zip64 end of central directory" structure
288 return _EndRecData64(fpin, maxCommentStart + start - filesize,
289 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000290
291 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200292 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000293
Fred Drake484d7352000-10-02 21:14:52 +0000294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000296 """Class with attributes describing each file in the ZIP archive."""
297
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'orig_filename',
300 'filename',
301 'date_time',
302 'compress_type',
303 'comment',
304 'extra',
305 'create_system',
306 'create_version',
307 'extract_version',
308 'reserved',
309 'flag_bits',
310 'volume',
311 'internal_attr',
312 'external_attr',
313 'header_offset',
314 'CRC',
315 'compress_size',
316 'file_size',
317 '_raw_time',
318 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000322
323 # Terminate the file name at the first null byte. Null bytes in file
324 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000325 null_byte = filename.find(chr(0))
326 if null_byte >= 0:
327 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000328 # This is used to ensure paths in generated ZIP files always use
329 # forward slashes as the directory separator, as required by the
330 # ZIP format specification.
331 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000332 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000333
Greg Ward8e36d282003-06-18 00:53:06 +0000334 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000335 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800336
337 if date_time[0] < 1980:
338 raise ValueError('ZIP does not support timestamps before 1980')
339
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000341 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000342 self.comment = b"" # Comment for each file
343 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000344 if sys.platform == 'win32':
345 self.create_system = 0 # System which created ZIP archive
346 else:
347 # Assume everything else is unix-y
348 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200349 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
350 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000351 self.reserved = 0 # Must be zero
352 self.flag_bits = 0 # ZIP flag bits
353 self.volume = 0 # Volume number of file header
354 self.internal_attr = 0 # Internal attributes
355 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000357 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000358 # CRC CRC-32 of the uncompressed file
359 # compress_size Size of the compressed file
360 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200362 def __repr__(self):
363 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
364 if self.compress_type != ZIP_STORED:
365 result.append(' compress_type=%s' %
366 compressor_names.get(self.compress_type,
367 self.compress_type))
368 hi = self.external_attr >> 16
369 lo = self.external_attr & 0xFFFF
370 if hi:
371 result.append(' filemode=%r' % stat.filemode(hi))
372 if lo:
373 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200374 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200375 if not isdir or self.file_size:
376 result.append(' file_size=%r' % self.file_size)
377 if ((not isdir or self.compress_size) and
378 (self.compress_type != ZIP_STORED or
379 self.file_size != self.compress_size)):
380 result.append(' compress_size=%r' % self.compress_size)
381 result.append('>')
382 return ''.join(result)
383
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200384 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000385 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 dt = self.date_time
387 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000388 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000390 # Set these to zero because we write them after the file data
391 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392 else:
Tim Peterse1190062001-01-15 03:34:38 +0000393 CRC = self.CRC
394 compress_size = self.compress_size
395 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396
397 extra = self.extra
398
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200399 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200400 if zip64 is None:
401 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
402 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000403 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200405 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200406 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
407 if not zip64:
408 raise LargeZipFile("Filesize would require ZIP64 extensions")
409 # File is larger than what fits into a 4 byte integer,
410 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000411 file_size = 0xffffffff
412 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200413 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200415 if self.compress_type == ZIP_BZIP2:
416 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200417 elif self.compress_type == ZIP_LZMA:
418 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200419
420 self.extract_version = max(min_version, self.extract_version)
421 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000422 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000423 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200424 self.extract_version, self.reserved, flag_bits,
425 self.compress_type, dostime, dosdate, CRC,
426 compress_size, file_size,
427 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000428 return header + filename + extra
429
430 def _encodeFilenameFlags(self):
431 try:
432 return self.filename.encode('ascii'), self.flag_bits
433 except UnicodeEncodeError:
434 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000435
436 def _decodeExtra(self):
437 # Try to decode the extra field.
438 extra = self.extra
439 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700440 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if tp == 1:
443 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000444 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000446 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000447 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000448 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449 elif ln == 0:
450 counts = ()
451 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000452 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
454 idx = 0
455
456 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000457 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 self.file_size = counts[idx]
459 idx += 1
460
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000461 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000462 self.compress_size = counts[idx]
463 idx += 1
464
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000465 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000466 old = self.header_offset
467 self.header_offset = counts[idx]
468 idx+=1
469
470 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200472 @classmethod
473 def from_file(cls, filename, arcname=None):
474 """Construct an appropriate ZipInfo for a file on the filesystem.
475
476 filename should be the path to a file or directory on the filesystem.
477
478 arcname is the name which it will have within the archive (by default,
479 this will be the same as filename, but without a drive letter and with
480 leading path separators removed).
481 """
482 st = os.stat(filename)
483 isdir = stat.S_ISDIR(st.st_mode)
484 mtime = time.localtime(st.st_mtime)
485 date_time = mtime[0:6]
486 # Create ZipInfo instance to store file information
487 if arcname is None:
488 arcname = filename
489 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
490 while arcname[0] in (os.sep, os.altsep):
491 arcname = arcname[1:]
492 if isdir:
493 arcname += '/'
494 zinfo = cls(arcname, date_time)
495 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
496 if isdir:
497 zinfo.file_size = 0
498 zinfo.external_attr |= 0x10 # MS-DOS directory flag
499 else:
500 zinfo.file_size = st.st_size
501
502 return zinfo
503
504 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300505 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200506 return self.filename[-1] == '/'
507
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000508
Thomas Wouterscf297e42007-02-23 15:07:44 +0000509class _ZipDecrypter:
510 """Class to handle decryption of files stored within a ZIP archive.
511
512 ZIP supports a password-based form of encryption. Even though known
513 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000514 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000515
516 Usage:
517 zd = _ZipDecrypter(mypwd)
518 plain_char = zd(cypher_char)
519 plain_text = map(zd, cypher_text)
520 """
521
522 def _GenerateCRCTable():
523 """Generate a CRC-32 table.
524
525 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
526 internal keys. We noticed that a direct implementation is faster than
527 relying on binascii.crc32().
528 """
529 poly = 0xedb88320
530 table = [0] * 256
531 for i in range(256):
532 crc = i
533 for j in range(8):
534 if crc & 1:
535 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
536 else:
537 crc = ((crc >> 1) & 0x7FFFFFFF)
538 table[i] = crc
539 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500540 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000541
542 def _crc32(self, ch, crc):
543 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000544 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000545
546 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500547 if _ZipDecrypter.crctable is None:
548 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000549 self.key0 = 305419896
550 self.key1 = 591751049
551 self.key2 = 878082192
552 for p in pwd:
553 self._UpdateKeys(p)
554
555 def _UpdateKeys(self, c):
556 self.key0 = self._crc32(c, self.key0)
557 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
558 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000559 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000560
561 def __call__(self, c):
562 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000563 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000564 k = self.key2 | 2
565 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000566 self._UpdateKeys(c)
567 return c
568
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200569
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200570class LZMACompressor:
571
572 def __init__(self):
573 self._comp = None
574
575 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200576 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200577 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200578 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200579 ])
580 return struct.pack('<BBH', 9, 4, len(props)) + props
581
582 def compress(self, data):
583 if self._comp is None:
584 return self._init() + self._comp.compress(data)
585 return self._comp.compress(data)
586
587 def flush(self):
588 if self._comp is None:
589 return self._init() + self._comp.flush()
590 return self._comp.flush()
591
592
593class LZMADecompressor:
594
595 def __init__(self):
596 self._decomp = None
597 self._unconsumed = b''
598 self.eof = False
599
600 def decompress(self, data):
601 if self._decomp is None:
602 self._unconsumed += data
603 if len(self._unconsumed) <= 4:
604 return b''
605 psize, = struct.unpack('<H', self._unconsumed[2:4])
606 if len(self._unconsumed) <= 4 + psize:
607 return b''
608
609 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200610 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
611 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200612 ])
613 data = self._unconsumed[4 + psize:]
614 del self._unconsumed
615
616 result = self._decomp.decompress(data)
617 self.eof = self._decomp.eof
618 return result
619
620
621compressor_names = {
622 0: 'store',
623 1: 'shrink',
624 2: 'reduce',
625 3: 'reduce',
626 4: 'reduce',
627 5: 'reduce',
628 6: 'implode',
629 7: 'tokenize',
630 8: 'deflate',
631 9: 'deflate64',
632 10: 'implode',
633 12: 'bzip2',
634 14: 'lzma',
635 18: 'terse',
636 19: 'lz77',
637 97: 'wavpack',
638 98: 'ppmd',
639}
640
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200641def _check_compression(compression):
642 if compression == ZIP_STORED:
643 pass
644 elif compression == ZIP_DEFLATED:
645 if not zlib:
646 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200647 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200648 elif compression == ZIP_BZIP2:
649 if not bz2:
650 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200651 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200652 elif compression == ZIP_LZMA:
653 if not lzma:
654 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200655 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200656 else:
657 raise RuntimeError("That compression method is not supported")
658
659
660def _get_compressor(compress_type):
661 if compress_type == ZIP_DEFLATED:
662 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200663 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200664 elif compress_type == ZIP_BZIP2:
665 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200666 elif compress_type == ZIP_LZMA:
667 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 else:
669 return None
670
671
672def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200673 if compress_type == ZIP_STORED:
674 return None
675 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200676 return zlib.decompressobj(-15)
677 elif compress_type == ZIP_BZIP2:
678 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200679 elif compress_type == ZIP_LZMA:
680 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200681 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200682 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200683 if descr:
684 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
685 else:
686 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687
688
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200689class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300690 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200691 self._file = file
692 self._pos = pos
693 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200694 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300695 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200696
697 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200698 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300699 if self._writing():
700 raise RuntimeError("Can't read from the ZIP file while there "
701 "is an open writing handle on it. "
702 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200703 self._file.seek(self._pos)
704 data = self._file.read(n)
705 self._pos = self._file.tell()
706 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200707
708 def close(self):
709 if self._file is not None:
710 fileobj = self._file
711 self._file = None
712 self._close(fileobj)
713
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200714# Provide the tell method for unseekable stream
715class _Tellable:
716 def __init__(self, fp):
717 self.fp = fp
718 self.offset = 0
719
720 def write(self, data):
721 n = self.fp.write(data)
722 self.offset += n
723 return n
724
725 def tell(self):
726 return self.offset
727
728 def flush(self):
729 self.fp.flush()
730
731 def close(self):
732 self.fp.close()
733
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200734
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000735class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000736 """File-like object for reading an archive member.
737 Is returned by ZipFile.open().
738 """
739
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000740 # Max size supported by decompressor.
741 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000742
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000743 # Read from compressed files in 4k blocks.
744 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000745
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000746 # Search for universal newlines or line chunks.
747 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
748
Łukasz Langae94980a2010-11-22 23:31:26 +0000749 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
750 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000751 self._fileobj = fileobj
752 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000753 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000754
Ezio Melotti92b47432010-01-28 01:44:41 +0000755 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000756 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200757 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000758
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200759 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000760
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200761 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000762 self._readbuffer = b''
763 self._offset = 0
764
765 self._universal = 'U' in mode
766 self.newlines = None
767
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000768 # Adjust read size for encrypted files since the first 12 bytes
769 # are for the encryption/password information.
770 if self._decrypter is not None:
771 self._compress_left -= 12
772
773 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 self.name = zipinfo.filename
775
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000776 if hasattr(zipinfo, 'CRC'):
777 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000778 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000779 else:
780 self._expected_crc = None
781
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200782 def __repr__(self):
783 result = ['<%s.%s' % (self.__class__.__module__,
784 self.__class__.__qualname__)]
785 if not self.closed:
786 result.append(' name=%r mode=%r' % (self.name, self.mode))
787 if self._compress_type != ZIP_STORED:
788 result.append(' compress_type=%s' %
789 compressor_names.get(self._compress_type,
790 self._compress_type))
791 else:
792 result.append(' [closed]')
793 result.append('>')
794 return ''.join(result)
795
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000796 def readline(self, limit=-1):
797 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000799 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000801
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 if not self._universal and limit < 0:
803 # Shortcut common case - newline found in buffer.
804 i = self._readbuffer.find(b'\n', self._offset) + 1
805 if i > 0:
806 line = self._readbuffer[self._offset: i]
807 self._offset = i
808 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000810 if not self._universal:
811 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000813 line = b''
814 while limit < 0 or len(line) < limit:
815 readahead = self.peek(2)
816 if readahead == b'':
817 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000818
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000819 #
820 # Search for universal newlines or line chunks.
821 #
822 # The pattern returns either a line chunk or a newline, but not
823 # both. Combined with peek(2), we are assured that the sequence
824 # '\r\n' is always retrieved completely and never split into
825 # separate newlines - '\r', '\n' due to coincidental readaheads.
826 #
827 match = self.PATTERN.search(readahead)
828 newline = match.group('newline')
829 if newline is not None:
830 if self.newlines is None:
831 self.newlines = []
832 if newline not in self.newlines:
833 self.newlines.append(newline)
834 self._offset += len(newline)
835 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000837 chunk = match.group('chunk')
838 if limit >= 0:
839 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000841 self._offset += len(chunk)
842 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000844 return line
845
846 def peek(self, n=1):
847 """Returns buffered bytes without advancing the position."""
848 if n > len(self._readbuffer) - self._offset:
849 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200850 if len(chunk) > self._offset:
851 self._readbuffer = chunk + self._readbuffer[self._offset:]
852 self._offset = 0
853 else:
854 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000855
856 # Return up to 512 bytes to reduce allocation overhead for tight loops.
857 return self._readbuffer[self._offset: self._offset + 512]
858
859 def readable(self):
860 return True
861
862 def read(self, n=-1):
863 """Read and return up to n bytes.
864 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200866 if n is None or n < 0:
867 buf = self._readbuffer[self._offset:]
868 self._readbuffer = b''
869 self._offset = 0
870 while not self._eof:
871 buf += self._read1(self.MAX_N)
872 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000873
Antoine Pitrou78157b32012-06-23 16:44:48 +0200874 end = n + self._offset
875 if end < len(self._readbuffer):
876 buf = self._readbuffer[self._offset:end]
877 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200878 return buf
879
Antoine Pitrou78157b32012-06-23 16:44:48 +0200880 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200881 buf = self._readbuffer[self._offset:]
882 self._readbuffer = b''
883 self._offset = 0
884 while n > 0 and not self._eof:
885 data = self._read1(n)
886 if n < len(data):
887 self._readbuffer = data
888 self._offset = n
889 buf += data[:n]
890 break
891 buf += data
892 n -= len(data)
893 return buf
894
895 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000896 # Update the CRC using the given data.
897 if self._expected_crc is None:
898 # No need to compute the CRC if we don't have a reference value
899 return
Martin Panterb82032f2015-12-11 05:19:29 +0000900 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000901 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200902 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000903 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000904
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000905 def read1(self, n):
906 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000907
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200908 if n is None or n < 0:
909 buf = self._readbuffer[self._offset:]
910 self._readbuffer = b''
911 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300912 while not self._eof:
913 data = self._read1(self.MAX_N)
914 if data:
915 buf += data
916 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200917 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918
Antoine Pitrou78157b32012-06-23 16:44:48 +0200919 end = n + self._offset
920 if end < len(self._readbuffer):
921 buf = self._readbuffer[self._offset:end]
922 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200923 return buf
924
Antoine Pitrou78157b32012-06-23 16:44:48 +0200925 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200926 buf = self._readbuffer[self._offset:]
927 self._readbuffer = b''
928 self._offset = 0
929 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300930 while not self._eof:
931 data = self._read1(n)
932 if n < len(data):
933 self._readbuffer = data
934 self._offset = n
935 buf += data[:n]
936 break
937 if data:
938 buf += data
939 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 return buf
941
942 def _read1(self, n):
943 # Read up to n compressed bytes with at most one read() system call,
944 # decrypt and decompress them.
945 if self._eof or n <= 0:
946 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000947
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000948 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200949 if self._compress_type == ZIP_DEFLATED:
950 ## Handle unconsumed data.
951 data = self._decompressor.unconsumed_tail
952 if n > len(data):
953 data += self._read2(n - len(data))
954 else:
955 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000956
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200957 if self._compress_type == ZIP_STORED:
958 self._eof = self._compress_left <= 0
959 elif self._compress_type == ZIP_DEFLATED:
960 n = max(n, self.MIN_READ_SIZE)
961 data = self._decompressor.decompress(data, n)
962 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200963 self._compress_left <= 0 and
964 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200965 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000966 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200967 else:
968 data = self._decompressor.decompress(data)
969 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200971 data = data[:self._left]
972 self._left -= len(data)
973 if self._left <= 0:
974 self._eof = True
975 self._update_crc(data)
976 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000977
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200978 def _read2(self, n):
979 if self._compress_left <= 0:
980 return b''
981
982 n = max(n, self.MIN_READ_SIZE)
983 n = min(n, self._compress_left)
984
985 data = self._fileobj.read(n)
986 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200987 if not data:
988 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200989
990 if self._decrypter is not None:
991 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000992 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993
Łukasz Langae94980a2010-11-22 23:31:26 +0000994 def close(self):
995 try:
996 if self._close_fileobj:
997 self._fileobj.close()
998 finally:
999 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001000
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001001
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001002class _ZipWriteFile(io.BufferedIOBase):
1003 def __init__(self, zf, zinfo, zip64):
1004 self._zinfo = zinfo
1005 self._zip64 = zip64
1006 self._zipfile = zf
1007 self._compressor = _get_compressor(zinfo.compress_type)
1008 self._file_size = 0
1009 self._compress_size = 0
1010 self._crc = 0
1011
1012 @property
1013 def _fileobj(self):
1014 return self._zipfile.fp
1015
1016 def writable(self):
1017 return True
1018
1019 def write(self, data):
1020 nbytes = len(data)
1021 self._file_size += nbytes
1022 self._crc = crc32(data, self._crc)
1023 if self._compressor:
1024 data = self._compressor.compress(data)
1025 self._compress_size += len(data)
1026 self._fileobj.write(data)
1027 return nbytes
1028
1029 def close(self):
1030 super().close()
1031 # Flush any data from the compressor, and update header info
1032 if self._compressor:
1033 buf = self._compressor.flush()
1034 self._compress_size += len(buf)
1035 self._fileobj.write(buf)
1036 self._zinfo.compress_size = self._compress_size
1037 else:
1038 self._zinfo.compress_size = self._file_size
1039 self._zinfo.CRC = self._crc
1040 self._zinfo.file_size = self._file_size
1041
1042 # Write updated header info
1043 if self._zinfo.flag_bits & 0x08:
1044 # Write CRC and file sizes after the file data
1045 fmt = '<LQQ' if self._zip64 else '<LLL'
1046 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1047 self._zinfo.compress_size, self._zinfo.file_size))
1048 self._zipfile.start_dir = self._fileobj.tell()
1049 else:
1050 if not self._zip64:
1051 if self._file_size > ZIP64_LIMIT:
1052 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1053 'limit')
1054 if self._compress_size > ZIP64_LIMIT:
1055 raise RuntimeError('Compressed size unexpectedly exceeded '
1056 'ZIP64 limit')
1057 # Seek backwards and write file header (which will now include
1058 # correct CRC and file sizes)
1059
1060 # Preserve current position in file
1061 self._zipfile.start_dir = self._fileobj.tell()
1062 self._fileobj.seek(self._zinfo.header_offset)
1063 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1064 self._fileobj.seek(self._zipfile.start_dir)
1065
1066 self._zipfile._writing = False
1067
1068 # Successfully written: Add file to our caches
1069 self._zipfile.filelist.append(self._zinfo)
1070 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1071
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001073 """ Class with methods to open, read, write, close, list zip files.
1074
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001075 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001076
Fred Drake3d9091e2001-03-26 15:49:24 +00001077 file: Either the path to the file, or a file-like object.
1078 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001079 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1080 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001081 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1082 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001083 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1084 needed, otherwise it will raise an exception when this would
1085 be necessary.
1086
Fred Drake3d9091e2001-03-26 15:49:24 +00001087 """
Fred Drake484d7352000-10-02 21:14:52 +00001088
Fred Drake90eac282001-02-28 05:29:34 +00001089 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001090 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001091
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001092 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001093 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1094 or append 'a'."""
1095 if mode not in ('r', 'w', 'x', 'a'):
1096 raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001097
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001098 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001099
1100 self._allowZip64 = allowZip64
1101 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001102 self.debug = 0 # Level of printing: 0 through 3
1103 self.NameToInfo = {} # Find file info given name
1104 self.filelist = [] # List of ZipInfo instances for archive
1105 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001106 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001107 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001108 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001109
Fred Drake3d9091e2001-03-26 15:49:24 +00001110 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001111 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001112 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001113 self._filePassed = 0
1114 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001115 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1116 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001117 filemode = modeDict[mode]
1118 while True:
1119 try:
1120 self.fp = io.open(file, filemode)
1121 except OSError:
1122 if filemode in modeDict:
1123 filemode = modeDict[filemode]
1124 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001125 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001126 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001127 else:
1128 self._filePassed = 1
1129 self.fp = file
1130 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001131 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001132 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001133 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001134 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001135
Antoine Pitrou17babc52012-11-17 23:50:08 +01001136 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001137 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001138 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001139 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001140 # set the modified flag so central directory gets written
1141 # even if no files are added to the archive
1142 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001143 try:
1144 self.start_dir = self.fp.tell()
1145 except (AttributeError, OSError):
1146 self.fp = _Tellable(self.fp)
1147 self.start_dir = 0
1148 self._seekable = False
1149 else:
1150 # Some file-like objects can provide tell() but not seek()
1151 try:
1152 self.fp.seek(self.start_dir)
1153 except (AttributeError, OSError):
1154 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001155 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001156 try:
1157 # See if file is a zip file
1158 self._RealGetContents()
1159 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001160 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001161 except BadZipFile:
1162 # file is not a zip file, just append
1163 self.fp.seek(0, 2)
1164
1165 # set the modified flag so central directory gets written
1166 # even if no files are added to the archive
1167 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001168 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001169 else:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001170 raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001171 except:
1172 fp = self.fp
1173 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001174 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001175 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001177 def __enter__(self):
1178 return self
1179
1180 def __exit__(self, type, value, traceback):
1181 self.close()
1182
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001183 def __repr__(self):
1184 result = ['<%s.%s' % (self.__class__.__module__,
1185 self.__class__.__qualname__)]
1186 if self.fp is not None:
1187 if self._filePassed:
1188 result.append(' file=%r' % self.fp)
1189 elif self.filename is not None:
1190 result.append(' filename=%r' % self.filename)
1191 result.append(' mode=%r' % self.mode)
1192 else:
1193 result.append(' [closed]')
1194 result.append('>')
1195 return ''.join(result)
1196
Tim Peters7d3bad62001-04-04 18:56:49 +00001197 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001198 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001200 try:
1201 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001202 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001203 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001204 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001205 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001206 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001207 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001208 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1209 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001210 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001211
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001212 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001213 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001214 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1215 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001216 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1217
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001218 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001219 inferred = concat + offset_cd
1220 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 # self.start_dir: Position of start of central directory
1222 self.start_dir = offset_cd + concat
1223 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001224 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001225 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001226 total = 0
1227 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001228 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001229 if len(centdir) != sizeCentralDir:
1230 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001231 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001232 if centdir[_CD_SIGNATURE] != stringCentralDir:
1233 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001234 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001235 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001236 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001237 flags = centdir[5]
1238 if flags & 0x800:
1239 # UTF-8 file names extension
1240 filename = filename.decode('utf-8')
1241 else:
1242 # Historical ZIP filename encoding
1243 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001244 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001245 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001246 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1247 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001248 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001249 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001250 x.flag_bits, x.compress_type, t, d,
1251 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001252 if x.extract_version > MAX_EXTRACT_VERSION:
1253 raise NotImplementedError("zip file version %.1f" %
1254 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001255 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1256 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001257 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001259 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001260
1261 x._decodeExtra()
1262 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263 self.filelist.append(x)
1264 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001265
1266 # update total bytes read from central directory
1267 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1268 + centdir[_CD_EXTRA_FIELD_LENGTH]
1269 + centdir[_CD_COMMENT_LENGTH])
1270
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001271 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001272 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001273
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001274
1275 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001276 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001277 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001278
1279 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001280 """Return a list of class ZipInfo instances for files in the
1281 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001282 return self.filelist
1283
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001284 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001285 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001286 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1287 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001289 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001290 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1291 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001292
1293 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001294 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001295 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001296 for zinfo in self.filelist:
1297 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001298 # Read by chunks, to avoid an OverflowError or a
1299 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001300 with self.open(zinfo.filename, "r") as f:
1301 while f.read(chunk_size): # Check CRC-32
1302 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001303 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 return zinfo.filename
1305
1306 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001307 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001308 info = self.NameToInfo.get(name)
1309 if info is None:
1310 raise KeyError(
1311 'There is no item named %r in the archive' % name)
1312
1313 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314
Thomas Wouterscf297e42007-02-23 15:07:44 +00001315 def setpassword(self, pwd):
1316 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001317 if pwd and not isinstance(pwd, bytes):
1318 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1319 if pwd:
1320 self.pwd = pwd
1321 else:
1322 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001323
R David Murrayf50b38a2012-04-12 18:44:58 -04001324 @property
1325 def comment(self):
1326 """The comment text associated with the ZIP file."""
1327 return self._comment
1328
1329 @comment.setter
1330 def comment(self, comment):
1331 if not isinstance(comment, bytes):
1332 raise TypeError("comment: expected bytes, got %s" % type(comment))
1333 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001334 if len(comment) > ZIP_MAX_COMMENT:
1335 import warnings
1336 warnings.warn('Archive comment is too long; truncating to %d bytes'
1337 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001338 comment = comment[:ZIP_MAX_COMMENT]
1339 self._comment = comment
1340 self._didModify = True
1341
Thomas Wouterscf297e42007-02-23 15:07:44 +00001342 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001343 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001344 with self.open(name, "r", pwd) as fp:
1345 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001347 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001348 """Return file-like object for 'name'.
1349
1350 name is a string for the file name within the ZIP file, or a ZipInfo
1351 object.
1352
1353 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1354 write to a file newly added to the archive.
1355
1356 pwd is the password to decrypt files (only used for reading).
1357
1358 When writing, if the file size is not known in advance but may exceed
1359 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1360 files. If the size is known in advance, it is best to pass a ZipInfo
1361 instance for name, with zinfo.file_size set.
1362 """
1363 if mode not in {"r", "w", "U", "rU"}:
1364 raise RuntimeError('open() requires mode "r", "w", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001365 if 'U' in mode:
1366 import warnings
1367 warnings.warn("'U' mode is deprecated",
1368 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001369 if pwd and not isinstance(pwd, bytes):
1370 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001371 if pwd and (mode == "w"):
1372 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001374 raise RuntimeError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001375 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001376
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001377 # Make sure we have an info object
1378 if isinstance(name, ZipInfo):
1379 # 'name' is already an info object
1380 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001381 elif mode == 'w':
1382 zinfo = ZipInfo(name)
1383 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001384 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001385 # Get info object for name
1386 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001387
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001388 if mode == 'w':
1389 return self._open_to_write(zinfo, force_zip64=force_zip64)
1390
1391 if self._writing:
1392 raise RuntimeError("Can't read from the ZIP file while there "
1393 "is an open writing handle on it. "
1394 "Close the writing handle before trying to read.")
1395
1396 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001397 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001398 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1399 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001400 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001401 # Skip the file header:
1402 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001403 if len(fheader) != sizeFileHeader:
1404 raise BadZipFile("Truncated file header")
1405 fheader = struct.unpack(structFileHeader, fheader)
1406 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001407 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001408
Antoine Pitrou17babc52012-11-17 23:50:08 +01001409 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1410 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1411 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001412
Antoine Pitrou8572da52012-11-17 23:52:05 +01001413 if zinfo.flag_bits & 0x20:
1414 # Zip 2.7: compressed patched data
1415 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001416
Antoine Pitrou8572da52012-11-17 23:52:05 +01001417 if zinfo.flag_bits & 0x40:
1418 # strong encryption
1419 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001420
Antoine Pitrou17babc52012-11-17 23:50:08 +01001421 if zinfo.flag_bits & 0x800:
1422 # UTF-8 filename
1423 fname_str = fname.decode("utf-8")
1424 else:
1425 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001426
Antoine Pitrou17babc52012-11-17 23:50:08 +01001427 if fname_str != zinfo.orig_filename:
1428 raise BadZipFile(
1429 'File name in directory %r and header %r differ.'
1430 % (zinfo.orig_filename, fname))
1431
1432 # check for encrypted flag & handle password
1433 is_encrypted = zinfo.flag_bits & 0x1
1434 zd = None
1435 if is_encrypted:
1436 if not pwd:
1437 pwd = self.pwd
1438 if not pwd:
1439 raise RuntimeError("File %s is encrypted, password "
1440 "required for extraction" % name)
1441
1442 zd = _ZipDecrypter(pwd)
1443 # The first 12 bytes in the cypher stream is an encryption header
1444 # used to strengthen the algorithm. The first 11 bytes are
1445 # completely random, while the 12th contains the MSB of the CRC,
1446 # or the MSB of the file time depending on the header type
1447 # and is used to check the correctness of the password.
1448 header = zef_file.read(12)
1449 h = list(map(zd, header[0:12]))
1450 if zinfo.flag_bits & 0x8:
1451 # compare against the file type from extended local headers
1452 check_byte = (zinfo._raw_time >> 8) & 0xff
1453 else:
1454 # compare against the CRC otherwise
1455 check_byte = (zinfo.CRC >> 24) & 0xff
1456 if h[11] != check_byte:
1457 raise RuntimeError("Bad password for file", name)
1458
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001459 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001460 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001461 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001462 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001463
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001464 def _open_to_write(self, zinfo, force_zip64=False):
1465 if force_zip64 and not self._allowZip64:
1466 raise ValueError(
1467 "force_zip64 is True, but allowZip64 was False when opening "
1468 "the ZIP file."
1469 )
1470 if self._writing:
1471 raise RuntimeError("Can't write to the ZIP file while there is "
1472 "another write handle open on it. "
1473 "Close the first handle before opening another.")
1474
1475 # Sizes and CRC are overwritten with correct data after processing the file
1476 if not hasattr(zinfo, 'file_size'):
1477 zinfo.file_size = 0
1478 zinfo.compress_size = 0
1479 zinfo.CRC = 0
1480
1481 zinfo.flag_bits = 0x00
1482 if zinfo.compress_type == ZIP_LZMA:
1483 # Compressed data includes an end-of-stream (EOS) marker
1484 zinfo.flag_bits |= 0x02
1485 if not self._seekable:
1486 zinfo.flag_bits |= 0x08
1487
1488 if not zinfo.external_attr:
1489 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1490
1491 # Compressed size can be larger than uncompressed size
1492 zip64 = self._allowZip64 and \
1493 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1494
1495 if self._seekable:
1496 self.fp.seek(self.start_dir)
1497 zinfo.header_offset = self.fp.tell()
1498
1499 self._writecheck(zinfo)
1500 self._didModify = True
1501
1502 self.fp.write(zinfo.FileHeader(zip64))
1503
1504 self._writing = True
1505 return _ZipWriteFile(self, zinfo, zip64)
1506
Christian Heimes790c8232008-01-07 21:14:23 +00001507 def extract(self, member, path=None, pwd=None):
1508 """Extract a member from the archive to the current working directory,
1509 using its full name. Its file information is extracted as accurately
1510 as possible. `member' may be a filename or a ZipInfo object. You can
1511 specify a different directory using `path'.
1512 """
1513 if not isinstance(member, ZipInfo):
1514 member = self.getinfo(member)
1515
1516 if path is None:
1517 path = os.getcwd()
1518
1519 return self._extract_member(member, path, pwd)
1520
1521 def extractall(self, path=None, members=None, pwd=None):
1522 """Extract all members from the archive to the current working
1523 directory. `path' specifies a different directory to extract to.
1524 `members' is optional and must be a subset of the list returned
1525 by namelist().
1526 """
1527 if members is None:
1528 members = self.namelist()
1529
1530 for zipinfo in members:
1531 self.extract(zipinfo, path, pwd)
1532
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001533 @classmethod
1534 def _sanitize_windows_name(cls, arcname, pathsep):
1535 """Replace bad characters and remove trailing dots from parts."""
1536 table = cls._windows_illegal_name_trans_table
1537 if not table:
1538 illegal = ':<>|"?*'
1539 table = str.maketrans(illegal, '_' * len(illegal))
1540 cls._windows_illegal_name_trans_table = table
1541 arcname = arcname.translate(table)
1542 # remove trailing dots
1543 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1544 # rejoin, removing empty parts.
1545 arcname = pathsep.join(x for x in arcname if x)
1546 return arcname
1547
Christian Heimes790c8232008-01-07 21:14:23 +00001548 def _extract_member(self, member, targetpath, pwd):
1549 """Extract the ZipInfo object 'member' to a physical
1550 file on the path targetpath.
1551 """
1552 # build the destination pathname, replacing
1553 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001554 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001555
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001556 if os.path.altsep:
1557 arcname = arcname.replace(os.path.altsep, os.path.sep)
1558 # interpret absolute pathname as relative, remove drive letter or
1559 # UNC path, redundant separators, "." and ".." components.
1560 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001561 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001562 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001563 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001564 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001565 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001566 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001567
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001568 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001569 targetpath = os.path.normpath(targetpath)
1570
1571 # Create all upper directories if necessary.
1572 upperdirs = os.path.dirname(targetpath)
1573 if upperdirs and not os.path.exists(upperdirs):
1574 os.makedirs(upperdirs)
1575
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001576 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001577 if not os.path.isdir(targetpath):
1578 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001579 return targetpath
1580
Antoine Pitrou17babc52012-11-17 23:50:08 +01001581 with self.open(member, pwd=pwd) as source, \
1582 open(targetpath, "wb") as target:
1583 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001584
1585 return targetpath
1586
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001587 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001588 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001589 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001590 import warnings
1591 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001592 if self.mode not in ('w', 'x', 'a'):
1593 raise RuntimeError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001594 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001595 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001596 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001597 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001598 if not self._allowZip64:
1599 requires_zip64 = None
1600 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1601 requires_zip64 = "Files count"
1602 elif zinfo.file_size > ZIP64_LIMIT:
1603 requires_zip64 = "Filesize"
1604 elif zinfo.header_offset > ZIP64_LIMIT:
1605 requires_zip64 = "Zipfile size"
1606 if requires_zip64:
1607 raise LargeZipFile(requires_zip64 +
1608 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001609
1610 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001611 """Put the bytes from filename into the archive under the name
1612 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001613 if not self.fp:
1614 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001615 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001616 if self._writing:
1617 raise RuntimeError(
1618 "Can't write to ZIP archive while an open writing handle exists"
1619 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001620
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001621 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001622
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001623 if zinfo.is_dir():
1624 zinfo.compress_size = 0
1625 zinfo.CRC = 0
1626 else:
1627 if compress_type is not None:
1628 zinfo.compress_type = compress_type
1629 else:
1630 zinfo.compress_type = self.compression
1631
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001632 if zinfo.is_dir():
1633 with self._lock:
1634 if self._seekable:
1635 self.fp.seek(self.start_dir)
1636 zinfo.header_offset = self.fp.tell() # Start of header bytes
1637 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001638 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001639 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001640
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001641 self._writecheck(zinfo)
1642 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001643
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001644 self.filelist.append(zinfo)
1645 self.NameToInfo[zinfo.filename] = zinfo
1646 self.fp.write(zinfo.FileHeader(False))
1647 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001648 else:
1649 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1650 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001651
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001652 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001653 """Write a file into the archive. The contents is 'data', which
1654 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1655 it is encoded as UTF-8 first.
1656 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001657 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001658 if isinstance(data, str):
1659 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001660 if not isinstance(zinfo_or_arcname, ZipInfo):
1661 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001662 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001663 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001664 if zinfo.filename[-1] == '/':
1665 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1666 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1667 else:
1668 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001669 else:
1670 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001671
1672 if not self.fp:
1673 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001674 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001675 if self._writing:
1676 raise RuntimeError(
1677 "Can't write to ZIP archive while an open writing handle exists."
1678 )
1679
1680 if compress_type is not None:
1681 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001682
Guido van Rossum85825dc2007-08-27 17:03:28 +00001683 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001684 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001685 with self.open(zinfo, mode='w') as dest:
1686 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001687
1688 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001689 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001690 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001691
1692 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001693 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001694 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001695 if self.fp is None:
1696 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001697
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001698 if self._writing:
1699 raise RuntimeError("Can't close the ZIP file while there is "
1700 "an open writing handle on it. "
1701 "Close the writing handle before closing the zip.")
1702
Antoine Pitrou17babc52012-11-17 23:50:08 +01001703 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001704 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001705 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001706 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001707 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001708 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001709 finally:
1710 fp = self.fp
1711 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001712 self._fpclose(fp)
1713
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001714 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001715 for zinfo in self.filelist: # write central directory
1716 dt = zinfo.date_time
1717 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1718 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1719 extra = []
1720 if zinfo.file_size > ZIP64_LIMIT \
1721 or zinfo.compress_size > ZIP64_LIMIT:
1722 extra.append(zinfo.file_size)
1723 extra.append(zinfo.compress_size)
1724 file_size = 0xffffffff
1725 compress_size = 0xffffffff
1726 else:
1727 file_size = zinfo.file_size
1728 compress_size = zinfo.compress_size
1729
1730 if zinfo.header_offset > ZIP64_LIMIT:
1731 extra.append(zinfo.header_offset)
1732 header_offset = 0xffffffff
1733 else:
1734 header_offset = zinfo.header_offset
1735
1736 extra_data = zinfo.extra
1737 min_version = 0
1738 if extra:
1739 # Append a ZIP64 field to the extra's
1740 extra_data = struct.pack(
1741 '<HH' + 'Q'*len(extra),
1742 1, 8*len(extra), *extra) + extra_data
1743
1744 min_version = ZIP64_VERSION
1745
1746 if zinfo.compress_type == ZIP_BZIP2:
1747 min_version = max(BZIP2_VERSION, min_version)
1748 elif zinfo.compress_type == ZIP_LZMA:
1749 min_version = max(LZMA_VERSION, min_version)
1750
1751 extract_version = max(min_version, zinfo.extract_version)
1752 create_version = max(min_version, zinfo.create_version)
1753 try:
1754 filename, flag_bits = zinfo._encodeFilenameFlags()
1755 centdir = struct.pack(structCentralDir,
1756 stringCentralDir, create_version,
1757 zinfo.create_system, extract_version, zinfo.reserved,
1758 flag_bits, zinfo.compress_type, dostime, dosdate,
1759 zinfo.CRC, compress_size, file_size,
1760 len(filename), len(extra_data), len(zinfo.comment),
1761 0, zinfo.internal_attr, zinfo.external_attr,
1762 header_offset)
1763 except DeprecationWarning:
1764 print((structCentralDir, stringCentralDir, create_version,
1765 zinfo.create_system, extract_version, zinfo.reserved,
1766 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1767 zinfo.CRC, compress_size, file_size,
1768 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1769 0, zinfo.internal_attr, zinfo.external_attr,
1770 header_offset), file=sys.stderr)
1771 raise
1772 self.fp.write(centdir)
1773 self.fp.write(filename)
1774 self.fp.write(extra_data)
1775 self.fp.write(zinfo.comment)
1776
1777 pos2 = self.fp.tell()
1778 # Write end-of-zip-archive record
1779 centDirCount = len(self.filelist)
1780 centDirSize = pos2 - self.start_dir
1781 centDirOffset = self.start_dir
1782 requires_zip64 = None
1783 if centDirCount > ZIP_FILECOUNT_LIMIT:
1784 requires_zip64 = "Files count"
1785 elif centDirOffset > ZIP64_LIMIT:
1786 requires_zip64 = "Central directory offset"
1787 elif centDirSize > ZIP64_LIMIT:
1788 requires_zip64 = "Central directory size"
1789 if requires_zip64:
1790 # Need to write the ZIP64 end-of-archive records
1791 if not self._allowZip64:
1792 raise LargeZipFile(requires_zip64 +
1793 " would require ZIP64 extensions")
1794 zip64endrec = struct.pack(
1795 structEndArchive64, stringEndArchive64,
1796 44, 45, 45, 0, 0, centDirCount, centDirCount,
1797 centDirSize, centDirOffset)
1798 self.fp.write(zip64endrec)
1799
1800 zip64locrec = struct.pack(
1801 structEndArchive64Locator,
1802 stringEndArchive64Locator, 0, pos2, 1)
1803 self.fp.write(zip64locrec)
1804 centDirCount = min(centDirCount, 0xFFFF)
1805 centDirSize = min(centDirSize, 0xFFFFFFFF)
1806 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1807
1808 endrec = struct.pack(structEndArchive, stringEndArchive,
1809 0, 0, centDirCount, centDirCount,
1810 centDirSize, centDirOffset, len(self._comment))
1811 self.fp.write(endrec)
1812 self.fp.write(self._comment)
1813 self.fp.flush()
1814
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001815 def _fpclose(self, fp):
1816 assert self._fileRefCnt > 0
1817 self._fileRefCnt -= 1
1818 if not self._fileRefCnt and not self._filePassed:
1819 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001820
1821
1822class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001823 """Class to create ZIP archives with Python library files and packages."""
1824
Georg Brandl8334fd92010-12-04 10:26:46 +00001825 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001826 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001827 ZipFile.__init__(self, file, mode=mode, compression=compression,
1828 allowZip64=allowZip64)
1829 self._optimize = optimize
1830
Christian Tismer59202e52013-10-21 03:59:23 +02001831 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001832 """Add all files from "pathname" to the ZIP archive.
1833
Fred Drake484d7352000-10-02 21:14:52 +00001834 If pathname is a package directory, search the directory and
1835 all package subdirectories recursively for all *.py and enter
1836 the modules into the archive. If pathname is a plain
1837 directory, listdir *.py and enter all modules. Else, pathname
1838 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001839 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001840 This method will compile the module.py into module.pyc if
1841 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001842 If filterfunc(pathname) is given, it is called with every argument.
1843 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001844 """
Christian Tismer59202e52013-10-21 03:59:23 +02001845 if filterfunc and not filterfunc(pathname):
1846 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001847 label = 'path' if os.path.isdir(pathname) else 'file'
1848 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001849 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001850 dir, name = os.path.split(pathname)
1851 if os.path.isdir(pathname):
1852 initname = os.path.join(pathname, "__init__.py")
1853 if os.path.isfile(initname):
1854 # This is a package directory, add it
1855 if basename:
1856 basename = "%s/%s" % (basename, name)
1857 else:
1858 basename = name
1859 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001860 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001861 fname, arcname = self._get_codename(initname[0:-3], basename)
1862 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001863 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001864 self.write(fname, arcname)
1865 dirlist = os.listdir(pathname)
1866 dirlist.remove("__init__.py")
1867 # Add all *.py files and package subdirectories
1868 for filename in dirlist:
1869 path = os.path.join(pathname, filename)
1870 root, ext = os.path.splitext(filename)
1871 if os.path.isdir(path):
1872 if os.path.isfile(os.path.join(path, "__init__.py")):
1873 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001874 self.writepy(path, basename,
1875 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001876 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001877 if filterfunc and not filterfunc(path):
1878 if self.debug:
1879 print('file "%s" skipped by filterfunc' % path)
1880 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001881 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001882 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001883 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001884 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001885 self.write(fname, arcname)
1886 else:
1887 # This is NOT a package directory, add its files at top level
1888 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001889 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001890 for filename in os.listdir(pathname):
1891 path = os.path.join(pathname, filename)
1892 root, ext = os.path.splitext(filename)
1893 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001894 if filterfunc and not filterfunc(path):
1895 if self.debug:
1896 print('file "%s" skipped by filterfunc' % path)
1897 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001898 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001899 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001900 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001901 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001902 self.write(fname, arcname)
1903 else:
1904 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001905 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001906 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001907 fname, arcname = self._get_codename(pathname[0:-3], basename)
1908 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001909 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001910 self.write(fname, arcname)
1911
1912 def _get_codename(self, pathname, basename):
1913 """Return (filename, archivename) for the path.
1914
Fred Drake484d7352000-10-02 21:14:52 +00001915 Given a module name path, return the correct file path and
1916 archive name, compiling if necessary. For example, given
1917 /python/lib/string, return (/python/lib/string.pyc, string).
1918 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001919 def _compile(file, optimize=-1):
1920 import py_compile
1921 if self.debug:
1922 print("Compiling", file)
1923 try:
1924 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001925 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001926 print(err.msg)
1927 return False
1928 return True
1929
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001930 file_py = pathname + ".py"
1931 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001932 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1933 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1934 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001935 if self._optimize == -1:
1936 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001937 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001938 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1939 # Use .pyc file.
1940 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001941 elif (os.path.isfile(pycache_opt0) and
1942 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001943 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1944 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001945 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001946 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001947 elif (os.path.isfile(pycache_opt1) and
1948 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1949 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001950 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001951 fname = pycache_opt1
1952 arcname = file_pyc
1953 elif (os.path.isfile(pycache_opt2) and
1954 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1955 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1956 # file name in the archive.
1957 fname = pycache_opt2
1958 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001959 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001960 # Compile py into PEP 3147 pyc file.
1961 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001962 if sys.flags.optimize == 0:
1963 fname = pycache_opt0
1964 elif sys.flags.optimize == 1:
1965 fname = pycache_opt1
1966 else:
1967 fname = pycache_opt2
1968 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001969 else:
1970 fname = arcname = file_py
1971 else:
1972 # new mode: use given optimization level
1973 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001974 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001975 arcname = file_pyc
1976 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001977 arcname = file_pyc
1978 if self._optimize == 1:
1979 fname = pycache_opt1
1980 elif self._optimize == 2:
1981 fname = pycache_opt2
1982 else:
1983 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1984 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001985 if not (os.path.isfile(fname) and
1986 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1987 if not _compile(file_py, optimize=self._optimize):
1988 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001989 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001990 if basename:
1991 archivename = "%s/%s" % (basename, archivename)
1992 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001993
1994
1995def main(args = None):
1996 import textwrap
1997 USAGE=textwrap.dedent("""\
1998 Usage:
1999 zipfile.py -l zipfile.zip # Show listing of a zipfile
2000 zipfile.py -t zipfile.zip # Test if a zipfile is valid
2001 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
2002 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
2003 """)
2004 if args is None:
2005 args = sys.argv[1:]
2006
2007 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002008 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002009 sys.exit(1)
2010
2011 if args[0] == '-l':
2012 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002013 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002014 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01002015 with ZipFile(args[1], 'r') as zf:
2016 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002017
2018 elif args[0] == '-t':
2019 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002020 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002021 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01002022 with ZipFile(args[1], 'r') as zf:
2023 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002024 if badfile:
2025 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002026 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002027
2028 elif args[0] == '-e':
2029 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002030 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002031 sys.exit(1)
2032
Antoine Pitrou17babc52012-11-17 23:50:08 +01002033 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03002034 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002035
2036 elif args[0] == '-c':
2037 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002038 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002039 sys.exit(1)
2040
2041 def addToZip(zf, path, zippath):
2042 if os.path.isfile(path):
2043 zf.write(path, zippath, ZIP_DEFLATED)
2044 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002045 if zippath:
2046 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002047 for nm in os.listdir(path):
2048 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002049 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002050 # else: ignore
2051
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02002052 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002053 for path in args[2:]:
2054 zippath = os.path.basename(path)
2055 if not zippath:
2056 zippath = os.path.basename(os.path.dirname(path))
2057 if zippath in ('', os.curdir, os.pardir):
2058 zippath = ''
2059 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002060
2061if __name__ == "__main__":
2062 main()