blob: 7f2b43ce1cc848b22d77c3a2a2617b67c4d6c841 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Serhiy Storchaka9e777732015-10-10 19:43:32 +030017try:
18 import threading
19except ImportError:
20 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021
22try:
Tim Peterse1190062001-01-15 03:34:38 +000023 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040025except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000027 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029try:
30 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020032 bz2 = None
33
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034try:
35 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040036except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 lzma = None
38
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020039__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020040 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000041 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Georg Brandl4d540882010-10-28 06:42:33 +000043class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
Georg Brandl4d540882010-10-28 06:42:33 +000053error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000056ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030057ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000058ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020064ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000065# Other ZIP compression methods not supported
66
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020067DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020071# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020072MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020073
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000079
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000082structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000102stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000103sizeCentralDir = struct.calcsize(structCentralDir)
104
Fred Drake3e038e52001-02-28 17:56:26 +0000105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000129stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130sizeFileHeader = struct.calcsize(structFileHeader)
131
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000167def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000169 if _EndRecData(fp):
170 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200171 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000172 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000173 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000174
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000175def is_zipfile(filename):
176 """Quickly see if a file is a ZIP file by checking the magic number.
177
178 The filename argument may be a file or file-like object too.
179 """
180 result = False
181 try:
182 if hasattr(filename, "read"):
183 result = _check_zipfile(fp=filename)
184 else:
185 with open(filename, "rb") as fp:
186 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200187 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188 pass
189 return result
190
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000191def _EndRecData64(fpin, offset, endrec):
192 """
193 Read the ZIP64 end-of-archive records and use that to update endrec
194 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 try:
196 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200197 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000198 # If the seek fails, the file is not large enough to contain a ZIP64
199 # end-of-archive record, so just return the end record we were given.
200 return endrec
201
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000202 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200203 if len(data) != sizeEndCentDir64Locator:
204 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
206 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207 return endrec
208
209 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000210 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000211
212 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
214 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200215 if len(data) != sizeEndCentDir64:
216 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200218 dircount, dircount2, dirsize, diroffset = \
219 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000220 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000221 return endrec
222
223 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000224 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 endrec[_ECD_DISK_NUMBER] = disk_num
226 endrec[_ECD_DISK_START] = disk_dir
227 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
228 endrec[_ECD_ENTRIES_TOTAL] = dircount2
229 endrec[_ECD_SIZE] = dirsize
230 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000231 return endrec
232
233
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234def _EndRecData(fpin):
235 """Return data from the "End of Central Directory" record, or None.
236
237 The data is a list of the nine items in the ZIP "End of central dir"
238 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000239
240 # Determine file size
241 fpin.seek(0, 2)
242 filesize = fpin.tell()
243
244 # Check to see if this is ZIP file with no archive comment (the
245 # "end of central directory" structure should be the last item in the
246 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 try:
248 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000250 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000251 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200252 if (len(data) == sizeEndCentDir and
253 data[0:4] == stringEndArchive and
254 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000256 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257 endrec=list(endrec)
258
259 # Append a blank comment and record start offset
260 endrec.append(b"")
261 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000263 # Try to read the "Zip64 end of central directory" structure
264 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265
266 # Either this is not a ZIP file, or it is a ZIP file with an archive
267 # comment. Search the end of the file for the "end of central directory"
268 # record signature. The comment is the last item in the ZIP file and may be
269 # up to 64K long. It is assumed that the "end of central directory" magic
270 # number does not appear in the comment.
271 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
272 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000273 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000274 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 if start >= 0:
276 # found the magic number; attempt to unpack and interpret
277 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200278 if len(recData) != sizeEndCentDir:
279 # Zip file is corrupted.
280 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000281 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
283 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
284 endrec.append(comment)
285 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000286
R David Murray4fbb9db2011-06-09 15:50:51 -0400287 # Try to read the "Zip64 end of central directory" structure
288 return _EndRecData64(fpin, maxCommentStart + start - filesize,
289 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000290
291 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200292 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000293
Fred Drake484d7352000-10-02 21:14:52 +0000294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000296 """Class with attributes describing each file in the ZIP archive."""
297
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'orig_filename',
300 'filename',
301 'date_time',
302 'compress_type',
303 'comment',
304 'extra',
305 'create_system',
306 'create_version',
307 'extract_version',
308 'reserved',
309 'flag_bits',
310 'volume',
311 'internal_attr',
312 'external_attr',
313 'header_offset',
314 'CRC',
315 'compress_size',
316 'file_size',
317 '_raw_time',
318 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000322
323 # Terminate the file name at the first null byte. Null bytes in file
324 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000325 null_byte = filename.find(chr(0))
326 if null_byte >= 0:
327 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000328 # This is used to ensure paths in generated ZIP files always use
329 # forward slashes as the directory separator, as required by the
330 # ZIP format specification.
331 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000332 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000333
Greg Ward8e36d282003-06-18 00:53:06 +0000334 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000335 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800336
337 if date_time[0] < 1980:
338 raise ValueError('ZIP does not support timestamps before 1980')
339
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000341 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000342 self.comment = b"" # Comment for each file
343 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000344 if sys.platform == 'win32':
345 self.create_system = 0 # System which created ZIP archive
346 else:
347 # Assume everything else is unix-y
348 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200349 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
350 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000351 self.reserved = 0 # Must be zero
352 self.flag_bits = 0 # ZIP flag bits
353 self.volume = 0 # Volume number of file header
354 self.internal_attr = 0 # Internal attributes
355 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000357 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000358 # CRC CRC-32 of the uncompressed file
359 # compress_size Size of the compressed file
360 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200362 def __repr__(self):
363 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
364 if self.compress_type != ZIP_STORED:
365 result.append(' compress_type=%s' %
366 compressor_names.get(self.compress_type,
367 self.compress_type))
368 hi = self.external_attr >> 16
369 lo = self.external_attr & 0xFFFF
370 if hi:
371 result.append(' filemode=%r' % stat.filemode(hi))
372 if lo:
373 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200374 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200375 if not isdir or self.file_size:
376 result.append(' file_size=%r' % self.file_size)
377 if ((not isdir or self.compress_size) and
378 (self.compress_type != ZIP_STORED or
379 self.file_size != self.compress_size)):
380 result.append(' compress_size=%r' % self.compress_size)
381 result.append('>')
382 return ''.join(result)
383
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200384 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000385 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 dt = self.date_time
387 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000388 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000390 # Set these to zero because we write them after the file data
391 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392 else:
Tim Peterse1190062001-01-15 03:34:38 +0000393 CRC = self.CRC
394 compress_size = self.compress_size
395 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396
397 extra = self.extra
398
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200399 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200400 if zip64 is None:
401 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
402 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000403 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200405 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200406 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
407 if not zip64:
408 raise LargeZipFile("Filesize would require ZIP64 extensions")
409 # File is larger than what fits into a 4 byte integer,
410 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000411 file_size = 0xffffffff
412 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200413 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200415 if self.compress_type == ZIP_BZIP2:
416 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200417 elif self.compress_type == ZIP_LZMA:
418 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200419
420 self.extract_version = max(min_version, self.extract_version)
421 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000422 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000423 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200424 self.extract_version, self.reserved, flag_bits,
425 self.compress_type, dostime, dosdate, CRC,
426 compress_size, file_size,
427 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000428 return header + filename + extra
429
430 def _encodeFilenameFlags(self):
431 try:
432 return self.filename.encode('ascii'), self.flag_bits
433 except UnicodeEncodeError:
434 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000435
436 def _decodeExtra(self):
437 # Try to decode the extra field.
438 extra = self.extra
439 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700440 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if tp == 1:
443 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000444 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000446 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000447 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000448 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449 elif ln == 0:
450 counts = ()
451 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300452 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
454 idx = 0
455
456 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000457 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 self.file_size = counts[idx]
459 idx += 1
460
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000461 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000462 self.compress_size = counts[idx]
463 idx += 1
464
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000465 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000466 old = self.header_offset
467 self.header_offset = counts[idx]
468 idx+=1
469
470 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200472 @classmethod
473 def from_file(cls, filename, arcname=None):
474 """Construct an appropriate ZipInfo for a file on the filesystem.
475
476 filename should be the path to a file or directory on the filesystem.
477
478 arcname is the name which it will have within the archive (by default,
479 this will be the same as filename, but without a drive letter and with
480 leading path separators removed).
481 """
482 st = os.stat(filename)
483 isdir = stat.S_ISDIR(st.st_mode)
484 mtime = time.localtime(st.st_mtime)
485 date_time = mtime[0:6]
486 # Create ZipInfo instance to store file information
487 if arcname is None:
488 arcname = filename
489 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
490 while arcname[0] in (os.sep, os.altsep):
491 arcname = arcname[1:]
492 if isdir:
493 arcname += '/'
494 zinfo = cls(arcname, date_time)
495 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
496 if isdir:
497 zinfo.file_size = 0
498 zinfo.external_attr |= 0x10 # MS-DOS directory flag
499 else:
500 zinfo.file_size = st.st_size
501
502 return zinfo
503
504 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300505 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200506 return self.filename[-1] == '/'
507
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000508
Thomas Wouterscf297e42007-02-23 15:07:44 +0000509class _ZipDecrypter:
510 """Class to handle decryption of files stored within a ZIP archive.
511
512 ZIP supports a password-based form of encryption. Even though known
513 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000514 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000515
516 Usage:
517 zd = _ZipDecrypter(mypwd)
518 plain_char = zd(cypher_char)
519 plain_text = map(zd, cypher_text)
520 """
521
522 def _GenerateCRCTable():
523 """Generate a CRC-32 table.
524
525 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
526 internal keys. We noticed that a direct implementation is faster than
527 relying on binascii.crc32().
528 """
529 poly = 0xedb88320
530 table = [0] * 256
531 for i in range(256):
532 crc = i
533 for j in range(8):
534 if crc & 1:
535 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
536 else:
537 crc = ((crc >> 1) & 0x7FFFFFFF)
538 table[i] = crc
539 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500540 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000541
542 def _crc32(self, ch, crc):
543 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000544 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000545
546 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500547 if _ZipDecrypter.crctable is None:
548 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000549 self.key0 = 305419896
550 self.key1 = 591751049
551 self.key2 = 878082192
552 for p in pwd:
553 self._UpdateKeys(p)
554
555 def _UpdateKeys(self, c):
556 self.key0 = self._crc32(c, self.key0)
557 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
558 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000559 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000560
561 def __call__(self, c):
562 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000563 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000564 k = self.key2 | 2
565 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000566 self._UpdateKeys(c)
567 return c
568
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200569
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200570class LZMACompressor:
571
572 def __init__(self):
573 self._comp = None
574
575 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200576 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200577 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200578 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200579 ])
580 return struct.pack('<BBH', 9, 4, len(props)) + props
581
582 def compress(self, data):
583 if self._comp is None:
584 return self._init() + self._comp.compress(data)
585 return self._comp.compress(data)
586
587 def flush(self):
588 if self._comp is None:
589 return self._init() + self._comp.flush()
590 return self._comp.flush()
591
592
593class LZMADecompressor:
594
595 def __init__(self):
596 self._decomp = None
597 self._unconsumed = b''
598 self.eof = False
599
600 def decompress(self, data):
601 if self._decomp is None:
602 self._unconsumed += data
603 if len(self._unconsumed) <= 4:
604 return b''
605 psize, = struct.unpack('<H', self._unconsumed[2:4])
606 if len(self._unconsumed) <= 4 + psize:
607 return b''
608
609 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200610 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
611 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200612 ])
613 data = self._unconsumed[4 + psize:]
614 del self._unconsumed
615
616 result = self._decomp.decompress(data)
617 self.eof = self._decomp.eof
618 return result
619
620
621compressor_names = {
622 0: 'store',
623 1: 'shrink',
624 2: 'reduce',
625 3: 'reduce',
626 4: 'reduce',
627 5: 'reduce',
628 6: 'implode',
629 7: 'tokenize',
630 8: 'deflate',
631 9: 'deflate64',
632 10: 'implode',
633 12: 'bzip2',
634 14: 'lzma',
635 18: 'terse',
636 19: 'lz77',
637 97: 'wavpack',
638 98: 'ppmd',
639}
640
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200641def _check_compression(compression):
642 if compression == ZIP_STORED:
643 pass
644 elif compression == ZIP_DEFLATED:
645 if not zlib:
646 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200647 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200648 elif compression == ZIP_BZIP2:
649 if not bz2:
650 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200651 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200652 elif compression == ZIP_LZMA:
653 if not lzma:
654 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200655 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200656 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300657 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200658
659
660def _get_compressor(compress_type):
661 if compress_type == ZIP_DEFLATED:
662 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200663 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200664 elif compress_type == ZIP_BZIP2:
665 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200666 elif compress_type == ZIP_LZMA:
667 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 else:
669 return None
670
671
672def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200673 if compress_type == ZIP_STORED:
674 return None
675 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200676 return zlib.decompressobj(-15)
677 elif compress_type == ZIP_BZIP2:
678 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200679 elif compress_type == ZIP_LZMA:
680 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200681 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200682 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200683 if descr:
684 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
685 else:
686 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200687
688
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200689class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300690 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200691 self._file = file
692 self._pos = pos
693 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200694 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300695 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200696
697 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200698 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300699 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300700 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300701 "is an open writing handle on it. "
702 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200703 self._file.seek(self._pos)
704 data = self._file.read(n)
705 self._pos = self._file.tell()
706 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200707
708 def close(self):
709 if self._file is not None:
710 fileobj = self._file
711 self._file = None
712 self._close(fileobj)
713
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200714# Provide the tell method for unseekable stream
715class _Tellable:
716 def __init__(self, fp):
717 self.fp = fp
718 self.offset = 0
719
720 def write(self, data):
721 n = self.fp.write(data)
722 self.offset += n
723 return n
724
725 def tell(self):
726 return self.offset
727
728 def flush(self):
729 self.fp.flush()
730
731 def close(self):
732 self.fp.close()
733
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200734
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000735class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000736 """File-like object for reading an archive member.
737 Is returned by ZipFile.open().
738 """
739
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000740 # Max size supported by decompressor.
741 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000742
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000743 # Read from compressed files in 4k blocks.
744 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000745
Łukasz Langae94980a2010-11-22 23:31:26 +0000746 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
747 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000748 self._fileobj = fileobj
749 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000750 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000751
Ezio Melotti92b47432010-01-28 01:44:41 +0000752 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000753 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200754 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000755
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200756 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000757
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200758 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000759 self._readbuffer = b''
760 self._offset = 0
761
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000762 self.newlines = None
763
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000764 # Adjust read size for encrypted files since the first 12 bytes
765 # are for the encryption/password information.
766 if self._decrypter is not None:
767 self._compress_left -= 12
768
769 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000770 self.name = zipinfo.filename
771
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000772 if hasattr(zipinfo, 'CRC'):
773 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000774 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000775 else:
776 self._expected_crc = None
777
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200778 def __repr__(self):
779 result = ['<%s.%s' % (self.__class__.__module__,
780 self.__class__.__qualname__)]
781 if not self.closed:
782 result.append(' name=%r mode=%r' % (self.name, self.mode))
783 if self._compress_type != ZIP_STORED:
784 result.append(' compress_type=%s' %
785 compressor_names.get(self._compress_type,
786 self._compress_type))
787 else:
788 result.append(' [closed]')
789 result.append('>')
790 return ''.join(result)
791
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000792 def readline(self, limit=-1):
793 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000795 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797
Serhiy Storchakae670be22016-06-11 19:32:44 +0300798 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000799 # Shortcut common case - newline found in buffer.
800 i = self._readbuffer.find(b'\n', self._offset) + 1
801 if i > 0:
802 line = self._readbuffer[self._offset: i]
803 self._offset = i
804 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805
Serhiy Storchakae670be22016-06-11 19:32:44 +0300806 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000807
808 def peek(self, n=1):
809 """Returns buffered bytes without advancing the position."""
810 if n > len(self._readbuffer) - self._offset:
811 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200812 if len(chunk) > self._offset:
813 self._readbuffer = chunk + self._readbuffer[self._offset:]
814 self._offset = 0
815 else:
816 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000817
818 # Return up to 512 bytes to reduce allocation overhead for tight loops.
819 return self._readbuffer[self._offset: self._offset + 512]
820
821 def readable(self):
822 return True
823
824 def read(self, n=-1):
825 """Read and return up to n bytes.
826 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200828 if n is None or n < 0:
829 buf = self._readbuffer[self._offset:]
830 self._readbuffer = b''
831 self._offset = 0
832 while not self._eof:
833 buf += self._read1(self.MAX_N)
834 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835
Antoine Pitrou78157b32012-06-23 16:44:48 +0200836 end = n + self._offset
837 if end < len(self._readbuffer):
838 buf = self._readbuffer[self._offset:end]
839 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200840 return buf
841
Antoine Pitrou78157b32012-06-23 16:44:48 +0200842 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200843 buf = self._readbuffer[self._offset:]
844 self._readbuffer = b''
845 self._offset = 0
846 while n > 0 and not self._eof:
847 data = self._read1(n)
848 if n < len(data):
849 self._readbuffer = data
850 self._offset = n
851 buf += data[:n]
852 break
853 buf += data
854 n -= len(data)
855 return buf
856
857 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000858 # Update the CRC using the given data.
859 if self._expected_crc is None:
860 # No need to compute the CRC if we don't have a reference value
861 return
Martin Panterb82032f2015-12-11 05:19:29 +0000862 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000863 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200864 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000865 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000866
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000867 def read1(self, n):
868 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000869
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200870 if n is None or n < 0:
871 buf = self._readbuffer[self._offset:]
872 self._readbuffer = b''
873 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300874 while not self._eof:
875 data = self._read1(self.MAX_N)
876 if data:
877 buf += data
878 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200879 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880
Antoine Pitrou78157b32012-06-23 16:44:48 +0200881 end = n + self._offset
882 if end < len(self._readbuffer):
883 buf = self._readbuffer[self._offset:end]
884 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200885 return buf
886
Antoine Pitrou78157b32012-06-23 16:44:48 +0200887 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200888 buf = self._readbuffer[self._offset:]
889 self._readbuffer = b''
890 self._offset = 0
891 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300892 while not self._eof:
893 data = self._read1(n)
894 if n < len(data):
895 self._readbuffer = data
896 self._offset = n
897 buf += data[:n]
898 break
899 if data:
900 buf += data
901 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200902 return buf
903
904 def _read1(self, n):
905 # Read up to n compressed bytes with at most one read() system call,
906 # decrypt and decompress them.
907 if self._eof or n <= 0:
908 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000910 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200911 if self._compress_type == ZIP_DEFLATED:
912 ## Handle unconsumed data.
913 data = self._decompressor.unconsumed_tail
914 if n > len(data):
915 data += self._read2(n - len(data))
916 else:
917 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200919 if self._compress_type == ZIP_STORED:
920 self._eof = self._compress_left <= 0
921 elif self._compress_type == ZIP_DEFLATED:
922 n = max(n, self.MIN_READ_SIZE)
923 data = self._decompressor.decompress(data, n)
924 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200925 self._compress_left <= 0 and
926 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200927 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000928 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200929 else:
930 data = self._decompressor.decompress(data)
931 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200933 data = data[:self._left]
934 self._left -= len(data)
935 if self._left <= 0:
936 self._eof = True
937 self._update_crc(data)
938 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000939
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 def _read2(self, n):
941 if self._compress_left <= 0:
942 return b''
943
944 n = max(n, self.MIN_READ_SIZE)
945 n = min(n, self._compress_left)
946
947 data = self._fileobj.read(n)
948 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200949 if not data:
950 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200951
952 if self._decrypter is not None:
953 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000954 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955
Łukasz Langae94980a2010-11-22 23:31:26 +0000956 def close(self):
957 try:
958 if self._close_fileobj:
959 self._fileobj.close()
960 finally:
961 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000963
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300964class _ZipWriteFile(io.BufferedIOBase):
965 def __init__(self, zf, zinfo, zip64):
966 self._zinfo = zinfo
967 self._zip64 = zip64
968 self._zipfile = zf
969 self._compressor = _get_compressor(zinfo.compress_type)
970 self._file_size = 0
971 self._compress_size = 0
972 self._crc = 0
973
974 @property
975 def _fileobj(self):
976 return self._zipfile.fp
977
978 def writable(self):
979 return True
980
981 def write(self, data):
982 nbytes = len(data)
983 self._file_size += nbytes
984 self._crc = crc32(data, self._crc)
985 if self._compressor:
986 data = self._compressor.compress(data)
987 self._compress_size += len(data)
988 self._fileobj.write(data)
989 return nbytes
990
991 def close(self):
992 super().close()
993 # Flush any data from the compressor, and update header info
994 if self._compressor:
995 buf = self._compressor.flush()
996 self._compress_size += len(buf)
997 self._fileobj.write(buf)
998 self._zinfo.compress_size = self._compress_size
999 else:
1000 self._zinfo.compress_size = self._file_size
1001 self._zinfo.CRC = self._crc
1002 self._zinfo.file_size = self._file_size
1003
1004 # Write updated header info
1005 if self._zinfo.flag_bits & 0x08:
1006 # Write CRC and file sizes after the file data
1007 fmt = '<LQQ' if self._zip64 else '<LLL'
1008 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1009 self._zinfo.compress_size, self._zinfo.file_size))
1010 self._zipfile.start_dir = self._fileobj.tell()
1011 else:
1012 if not self._zip64:
1013 if self._file_size > ZIP64_LIMIT:
1014 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1015 'limit')
1016 if self._compress_size > ZIP64_LIMIT:
1017 raise RuntimeError('Compressed size unexpectedly exceeded '
1018 'ZIP64 limit')
1019 # Seek backwards and write file header (which will now include
1020 # correct CRC and file sizes)
1021
1022 # Preserve current position in file
1023 self._zipfile.start_dir = self._fileobj.tell()
1024 self._fileobj.seek(self._zinfo.header_offset)
1025 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1026 self._fileobj.seek(self._zipfile.start_dir)
1027
1028 self._zipfile._writing = False
1029
1030 # Successfully written: Add file to our caches
1031 self._zipfile.filelist.append(self._zinfo)
1032 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1033
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001035 """ Class with methods to open, read, write, close, list zip files.
1036
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001037 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001038
Fred Drake3d9091e2001-03-26 15:49:24 +00001039 file: Either the path to the file, or a file-like object.
1040 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001041 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1042 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001043 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1044 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1046 needed, otherwise it will raise an exception when this would
1047 be necessary.
1048
Fred Drake3d9091e2001-03-26 15:49:24 +00001049 """
Fred Drake484d7352000-10-02 21:14:52 +00001050
Fred Drake90eac282001-02-28 05:29:34 +00001051 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001052 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001053
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001054 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001055 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1056 or append 'a'."""
1057 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001058 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001059
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001060 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001061
1062 self._allowZip64 = allowZip64
1063 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001064 self.debug = 0 # Level of printing: 0 through 3
1065 self.NameToInfo = {} # Find file info given name
1066 self.filelist = [] # List of ZipInfo instances for archive
1067 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001068 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001069 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001070 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001071
Fred Drake3d9091e2001-03-26 15:49:24 +00001072 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001073 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001074 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001075 self._filePassed = 0
1076 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001077 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1078 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001079 filemode = modeDict[mode]
1080 while True:
1081 try:
1082 self.fp = io.open(file, filemode)
1083 except OSError:
1084 if filemode in modeDict:
1085 filemode = modeDict[filemode]
1086 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001087 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001088 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001089 else:
1090 self._filePassed = 1
1091 self.fp = file
1092 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001093 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001094 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001095 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001096 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001097
Antoine Pitrou17babc52012-11-17 23:50:08 +01001098 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001099 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001100 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001101 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001102 # set the modified flag so central directory gets written
1103 # even if no files are added to the archive
1104 self._didModify = True
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001105 self._start_disk = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001106 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001107 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001108 except (AttributeError, OSError):
1109 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001110 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001111 self._seekable = False
1112 else:
1113 # Some file-like objects can provide tell() but not seek()
1114 try:
1115 self.fp.seek(self.start_dir)
1116 except (AttributeError, OSError):
1117 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001118 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001119 try:
1120 # See if file is a zip file
1121 self._RealGetContents()
1122 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001123 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001124 except BadZipFile:
1125 # file is not a zip file, just append
1126 self.fp.seek(0, 2)
1127
1128 # set the modified flag so central directory gets written
1129 # even if no files are added to the archive
1130 self._didModify = True
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001131 self.start_dir = self._start_disk = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001132 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001133 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001134 except:
1135 fp = self.fp
1136 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001137 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001138 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001140 def __enter__(self):
1141 return self
1142
1143 def __exit__(self, type, value, traceback):
1144 self.close()
1145
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001146 def __repr__(self):
1147 result = ['<%s.%s' % (self.__class__.__module__,
1148 self.__class__.__qualname__)]
1149 if self.fp is not None:
1150 if self._filePassed:
1151 result.append(' file=%r' % self.fp)
1152 elif self.filename is not None:
1153 result.append(' filename=%r' % self.filename)
1154 result.append(' mode=%r' % self.mode)
1155 else:
1156 result.append(' [closed]')
1157 result.append('>')
1158 return ''.join(result)
1159
Tim Peters7d3bad62001-04-04 18:56:49 +00001160 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001161 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001162 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001163 try:
1164 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001165 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001166 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001167 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001168 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001169 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001170 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001171 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1172 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001173 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001174
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001175 # self._start_disk: Position of the start of ZIP archive
1176 # It is zero, unless ZIP was concatenated to another file
1177 self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001178 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1179 # If Zip64 extension structures are present, account for them
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001180 self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001181
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001182 if self.debug > 2:
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001183 inferred = self._start_disk + offset_cd
1184 print("given, inferred, offset", offset_cd, inferred, self._start_disk)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001185 # self.start_dir: Position of start of central directory
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001186 self.start_dir = offset_cd + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001187 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001188 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001189 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001190 total = 0
1191 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001192 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001193 if len(centdir) != sizeCentralDir:
1194 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001195 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001196 if centdir[_CD_SIGNATURE] != stringCentralDir:
1197 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001198 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001199 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001200 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001201 flags = centdir[5]
1202 if flags & 0x800:
1203 # UTF-8 file names extension
1204 filename = filename.decode('utf-8')
1205 else:
1206 # Historical ZIP filename encoding
1207 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001208 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001209 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001210 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1211 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001212 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001213 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001214 x.flag_bits, x.compress_type, t, d,
1215 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001216 if x.extract_version > MAX_EXTRACT_VERSION:
1217 raise NotImplementedError("zip file version %.1f" %
1218 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1220 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001221 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001222 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001223 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001224
1225 x._decodeExtra()
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001226 x.header_offset = x.header_offset + self._start_disk
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 self.filelist.append(x)
1228 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001229
1230 # update total bytes read from central directory
1231 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1232 + centdir[_CD_EXTRA_FIELD_LENGTH]
1233 + centdir[_CD_COMMENT_LENGTH])
1234
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001235 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001236 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001237
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001238
1239 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001240 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001241 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242
1243 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001244 """Return a list of class ZipInfo instances for files in the
1245 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001246 return self.filelist
1247
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001248 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001249 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001250 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1251 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001252 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001253 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001254 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1255 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001256
1257 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001258 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001259 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001260 for zinfo in self.filelist:
1261 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001262 # Read by chunks, to avoid an OverflowError or a
1263 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001264 with self.open(zinfo.filename, "r") as f:
1265 while f.read(chunk_size): # Check CRC-32
1266 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001267 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 return zinfo.filename
1269
1270 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001271 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001272 info = self.NameToInfo.get(name)
1273 if info is None:
1274 raise KeyError(
1275 'There is no item named %r in the archive' % name)
1276
1277 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001278
Thomas Wouterscf297e42007-02-23 15:07:44 +00001279 def setpassword(self, pwd):
1280 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001281 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001282 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001283 if pwd:
1284 self.pwd = pwd
1285 else:
1286 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001287
R David Murrayf50b38a2012-04-12 18:44:58 -04001288 @property
1289 def comment(self):
1290 """The comment text associated with the ZIP file."""
1291 return self._comment
1292
1293 @comment.setter
1294 def comment(self, comment):
1295 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001296 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001297 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001298 if len(comment) > ZIP_MAX_COMMENT:
1299 import warnings
1300 warnings.warn('Archive comment is too long; truncating to %d bytes'
1301 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001302 comment = comment[:ZIP_MAX_COMMENT]
1303 self._comment = comment
1304 self._didModify = True
1305
Thomas Wouterscf297e42007-02-23 15:07:44 +00001306 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001307 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001308 with self.open(name, "r", pwd) as fp:
1309 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001310
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001311 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001312 """Return file-like object for 'name'.
1313
1314 name is a string for the file name within the ZIP file, or a ZipInfo
1315 object.
1316
1317 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1318 write to a file newly added to the archive.
1319
1320 pwd is the password to decrypt files (only used for reading).
1321
1322 When writing, if the file size is not known in advance but may exceed
1323 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1324 files. If the size is known in advance, it is best to pass a ZipInfo
1325 instance for name, with zinfo.file_size set.
1326 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001327 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001328 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001329 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001330 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001331 if pwd and (mode == "w"):
1332 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001334 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001335 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001336
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001337 # Make sure we have an info object
1338 if isinstance(name, ZipInfo):
1339 # 'name' is already an info object
1340 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001341 elif mode == 'w':
1342 zinfo = ZipInfo(name)
1343 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001344 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001345 # Get info object for name
1346 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001347
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001348 if mode == 'w':
1349 return self._open_to_write(zinfo, force_zip64=force_zip64)
1350
1351 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001352 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001353 "is an open writing handle on it. "
1354 "Close the writing handle before trying to read.")
1355
1356 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001357 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001358 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1359 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001360 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001361 # Skip the file header:
1362 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001363 if len(fheader) != sizeFileHeader:
1364 raise BadZipFile("Truncated file header")
1365 fheader = struct.unpack(structFileHeader, fheader)
1366 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001367 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001368
Antoine Pitrou17babc52012-11-17 23:50:08 +01001369 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1370 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1371 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001372
Antoine Pitrou8572da52012-11-17 23:52:05 +01001373 if zinfo.flag_bits & 0x20:
1374 # Zip 2.7: compressed patched data
1375 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001376
Antoine Pitrou8572da52012-11-17 23:52:05 +01001377 if zinfo.flag_bits & 0x40:
1378 # strong encryption
1379 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001380
Antoine Pitrou17babc52012-11-17 23:50:08 +01001381 if zinfo.flag_bits & 0x800:
1382 # UTF-8 filename
1383 fname_str = fname.decode("utf-8")
1384 else:
1385 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001386
Antoine Pitrou17babc52012-11-17 23:50:08 +01001387 if fname_str != zinfo.orig_filename:
1388 raise BadZipFile(
1389 'File name in directory %r and header %r differ.'
1390 % (zinfo.orig_filename, fname))
1391
1392 # check for encrypted flag & handle password
1393 is_encrypted = zinfo.flag_bits & 0x1
1394 zd = None
1395 if is_encrypted:
1396 if not pwd:
1397 pwd = self.pwd
1398 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001399 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001400 "required for extraction" % name)
1401
1402 zd = _ZipDecrypter(pwd)
1403 # The first 12 bytes in the cypher stream is an encryption header
1404 # used to strengthen the algorithm. The first 11 bytes are
1405 # completely random, while the 12th contains the MSB of the CRC,
1406 # or the MSB of the file time depending on the header type
1407 # and is used to check the correctness of the password.
1408 header = zef_file.read(12)
1409 h = list(map(zd, header[0:12]))
1410 if zinfo.flag_bits & 0x8:
1411 # compare against the file type from extended local headers
1412 check_byte = (zinfo._raw_time >> 8) & 0xff
1413 else:
1414 # compare against the CRC otherwise
1415 check_byte = (zinfo.CRC >> 24) & 0xff
1416 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001417 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001418
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001419 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001420 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001421 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001422 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001423
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001424 def _open_to_write(self, zinfo, force_zip64=False):
1425 if force_zip64 and not self._allowZip64:
1426 raise ValueError(
1427 "force_zip64 is True, but allowZip64 was False when opening "
1428 "the ZIP file."
1429 )
1430 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001431 raise ValueError("Can't write to the ZIP file while there is "
1432 "another write handle open on it. "
1433 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001434
1435 # Sizes and CRC are overwritten with correct data after processing the file
1436 if not hasattr(zinfo, 'file_size'):
1437 zinfo.file_size = 0
1438 zinfo.compress_size = 0
1439 zinfo.CRC = 0
1440
1441 zinfo.flag_bits = 0x00
1442 if zinfo.compress_type == ZIP_LZMA:
1443 # Compressed data includes an end-of-stream (EOS) marker
1444 zinfo.flag_bits |= 0x02
1445 if not self._seekable:
1446 zinfo.flag_bits |= 0x08
1447
1448 if not zinfo.external_attr:
1449 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1450
1451 # Compressed size can be larger than uncompressed size
1452 zip64 = self._allowZip64 and \
1453 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1454
1455 if self._seekable:
1456 self.fp.seek(self.start_dir)
1457 zinfo.header_offset = self.fp.tell()
1458
1459 self._writecheck(zinfo)
1460 self._didModify = True
1461
1462 self.fp.write(zinfo.FileHeader(zip64))
1463
1464 self._writing = True
1465 return _ZipWriteFile(self, zinfo, zip64)
1466
Christian Heimes790c8232008-01-07 21:14:23 +00001467 def extract(self, member, path=None, pwd=None):
1468 """Extract a member from the archive to the current working directory,
1469 using its full name. Its file information is extracted as accurately
1470 as possible. `member' may be a filename or a ZipInfo object. You can
1471 specify a different directory using `path'.
1472 """
1473 if not isinstance(member, ZipInfo):
1474 member = self.getinfo(member)
1475
1476 if path is None:
1477 path = os.getcwd()
1478
1479 return self._extract_member(member, path, pwd)
1480
1481 def extractall(self, path=None, members=None, pwd=None):
1482 """Extract all members from the archive to the current working
1483 directory. `path' specifies a different directory to extract to.
1484 `members' is optional and must be a subset of the list returned
1485 by namelist().
1486 """
1487 if members is None:
1488 members = self.namelist()
1489
1490 for zipinfo in members:
1491 self.extract(zipinfo, path, pwd)
1492
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001493 @classmethod
1494 def _sanitize_windows_name(cls, arcname, pathsep):
1495 """Replace bad characters and remove trailing dots from parts."""
1496 table = cls._windows_illegal_name_trans_table
1497 if not table:
1498 illegal = ':<>|"?*'
1499 table = str.maketrans(illegal, '_' * len(illegal))
1500 cls._windows_illegal_name_trans_table = table
1501 arcname = arcname.translate(table)
1502 # remove trailing dots
1503 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1504 # rejoin, removing empty parts.
1505 arcname = pathsep.join(x for x in arcname if x)
1506 return arcname
1507
Christian Heimes790c8232008-01-07 21:14:23 +00001508 def _extract_member(self, member, targetpath, pwd):
1509 """Extract the ZipInfo object 'member' to a physical
1510 file on the path targetpath.
1511 """
1512 # build the destination pathname, replacing
1513 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001514 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001515
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001516 if os.path.altsep:
1517 arcname = arcname.replace(os.path.altsep, os.path.sep)
1518 # interpret absolute pathname as relative, remove drive letter or
1519 # UNC path, redundant separators, "." and ".." components.
1520 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001521 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001522 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001523 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001524 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001525 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001526 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001527
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001528 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001529 targetpath = os.path.normpath(targetpath)
1530
1531 # Create all upper directories if necessary.
1532 upperdirs = os.path.dirname(targetpath)
1533 if upperdirs and not os.path.exists(upperdirs):
1534 os.makedirs(upperdirs)
1535
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001536 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001537 if not os.path.isdir(targetpath):
1538 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001539 return targetpath
1540
Antoine Pitrou17babc52012-11-17 23:50:08 +01001541 with self.open(member, pwd=pwd) as source, \
1542 open(targetpath, "wb") as target:
1543 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001544
1545 return targetpath
1546
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001547 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001548 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001549 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001550 import warnings
1551 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001552 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001553 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001554 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001555 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001556 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001557 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001558 if not self._allowZip64:
1559 requires_zip64 = None
1560 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1561 requires_zip64 = "Files count"
1562 elif zinfo.file_size > ZIP64_LIMIT:
1563 requires_zip64 = "Filesize"
1564 elif zinfo.header_offset > ZIP64_LIMIT:
1565 requires_zip64 = "Zipfile size"
1566 if requires_zip64:
1567 raise LargeZipFile(requires_zip64 +
1568 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001569
1570 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001571 """Put the bytes from filename into the archive under the name
1572 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001573 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001574 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001575 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001576 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001577 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001578 "Can't write to ZIP archive while an open writing handle exists"
1579 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001580
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001581 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001582
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001583 if zinfo.is_dir():
1584 zinfo.compress_size = 0
1585 zinfo.CRC = 0
1586 else:
1587 if compress_type is not None:
1588 zinfo.compress_type = compress_type
1589 else:
1590 zinfo.compress_type = self.compression
1591
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001592 if zinfo.is_dir():
1593 with self._lock:
1594 if self._seekable:
1595 self.fp.seek(self.start_dir)
1596 zinfo.header_offset = self.fp.tell() # Start of header bytes
1597 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001598 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001599 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001600
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001601 self._writecheck(zinfo)
1602 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001603
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001604 self.filelist.append(zinfo)
1605 self.NameToInfo[zinfo.filename] = zinfo
1606 self.fp.write(zinfo.FileHeader(False))
1607 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001608 else:
1609 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1610 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001611
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001612 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001613 """Write a file into the archive. The contents is 'data', which
1614 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1615 it is encoded as UTF-8 first.
1616 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001617 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001618 if isinstance(data, str):
1619 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001620 if not isinstance(zinfo_or_arcname, ZipInfo):
1621 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001622 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001623 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001624 if zinfo.filename[-1] == '/':
1625 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1626 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1627 else:
1628 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001629 else:
1630 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001631
1632 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001633 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001634 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001635 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001636 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001637 "Can't write to ZIP archive while an open writing handle exists."
1638 )
1639
1640 if compress_type is not None:
1641 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001642
Guido van Rossum85825dc2007-08-27 17:03:28 +00001643 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001644 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001645 with self.open(zinfo, mode='w') as dest:
1646 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001647
1648 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001649 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001650 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001651
1652 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001653 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001654 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001655 if self.fp is None:
1656 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001657
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001658 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001659 raise ValueError("Can't close the ZIP file while there is "
1660 "an open writing handle on it. "
1661 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001662
Antoine Pitrou17babc52012-11-17 23:50:08 +01001663 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001664 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001665 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001666 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001667 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001668 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001669 finally:
1670 fp = self.fp
1671 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001672 self._fpclose(fp)
1673
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001674 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001675 for zinfo in self.filelist: # write central directory
1676 dt = zinfo.date_time
1677 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1678 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1679 extra = []
1680 if zinfo.file_size > ZIP64_LIMIT \
1681 or zinfo.compress_size > ZIP64_LIMIT:
1682 extra.append(zinfo.file_size)
1683 extra.append(zinfo.compress_size)
1684 file_size = 0xffffffff
1685 compress_size = 0xffffffff
1686 else:
1687 file_size = zinfo.file_size
1688 compress_size = zinfo.compress_size
1689
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001690 header_offset = zinfo.header_offset - self._start_disk
1691 if header_offset > ZIP64_LIMIT:
1692 extra.append(header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001693 header_offset = 0xffffffff
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001694
1695 extra_data = zinfo.extra
1696 min_version = 0
1697 if extra:
1698 # Append a ZIP64 field to the extra's
1699 extra_data = struct.pack(
1700 '<HH' + 'Q'*len(extra),
1701 1, 8*len(extra), *extra) + extra_data
1702
1703 min_version = ZIP64_VERSION
1704
1705 if zinfo.compress_type == ZIP_BZIP2:
1706 min_version = max(BZIP2_VERSION, min_version)
1707 elif zinfo.compress_type == ZIP_LZMA:
1708 min_version = max(LZMA_VERSION, min_version)
1709
1710 extract_version = max(min_version, zinfo.extract_version)
1711 create_version = max(min_version, zinfo.create_version)
1712 try:
1713 filename, flag_bits = zinfo._encodeFilenameFlags()
1714 centdir = struct.pack(structCentralDir,
1715 stringCentralDir, create_version,
1716 zinfo.create_system, extract_version, zinfo.reserved,
1717 flag_bits, zinfo.compress_type, dostime, dosdate,
1718 zinfo.CRC, compress_size, file_size,
1719 len(filename), len(extra_data), len(zinfo.comment),
1720 0, zinfo.internal_attr, zinfo.external_attr,
1721 header_offset)
1722 except DeprecationWarning:
1723 print((structCentralDir, stringCentralDir, create_version,
1724 zinfo.create_system, extract_version, zinfo.reserved,
1725 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1726 zinfo.CRC, compress_size, file_size,
1727 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1728 0, zinfo.internal_attr, zinfo.external_attr,
1729 header_offset), file=sys.stderr)
1730 raise
1731 self.fp.write(centdir)
1732 self.fp.write(filename)
1733 self.fp.write(extra_data)
1734 self.fp.write(zinfo.comment)
1735
1736 pos2 = self.fp.tell()
1737 # Write end-of-zip-archive record
1738 centDirCount = len(self.filelist)
1739 centDirSize = pos2 - self.start_dir
Serhiy Storchaka8793b212016-10-07 22:20:50 +03001740 centDirOffset = self.start_dir - self._start_disk
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001741 requires_zip64 = None
1742 if centDirCount > ZIP_FILECOUNT_LIMIT:
1743 requires_zip64 = "Files count"
1744 elif centDirOffset > ZIP64_LIMIT:
1745 requires_zip64 = "Central directory offset"
1746 elif centDirSize > ZIP64_LIMIT:
1747 requires_zip64 = "Central directory size"
1748 if requires_zip64:
1749 # Need to write the ZIP64 end-of-archive records
1750 if not self._allowZip64:
1751 raise LargeZipFile(requires_zip64 +
1752 " would require ZIP64 extensions")
1753 zip64endrec = struct.pack(
1754 structEndArchive64, stringEndArchive64,
1755 44, 45, 45, 0, 0, centDirCount, centDirCount,
1756 centDirSize, centDirOffset)
1757 self.fp.write(zip64endrec)
1758
1759 zip64locrec = struct.pack(
1760 structEndArchive64Locator,
1761 stringEndArchive64Locator, 0, pos2, 1)
1762 self.fp.write(zip64locrec)
1763 centDirCount = min(centDirCount, 0xFFFF)
1764 centDirSize = min(centDirSize, 0xFFFFFFFF)
1765 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1766
1767 endrec = struct.pack(structEndArchive, stringEndArchive,
1768 0, 0, centDirCount, centDirCount,
1769 centDirSize, centDirOffset, len(self._comment))
1770 self.fp.write(endrec)
1771 self.fp.write(self._comment)
1772 self.fp.flush()
1773
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001774 def _fpclose(self, fp):
1775 assert self._fileRefCnt > 0
1776 self._fileRefCnt -= 1
1777 if not self._fileRefCnt and not self._filePassed:
1778 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001779
1780
1781class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001782 """Class to create ZIP archives with Python library files and packages."""
1783
Georg Brandl8334fd92010-12-04 10:26:46 +00001784 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001785 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001786 ZipFile.__init__(self, file, mode=mode, compression=compression,
1787 allowZip64=allowZip64)
1788 self._optimize = optimize
1789
Christian Tismer59202e52013-10-21 03:59:23 +02001790 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001791 """Add all files from "pathname" to the ZIP archive.
1792
Fred Drake484d7352000-10-02 21:14:52 +00001793 If pathname is a package directory, search the directory and
1794 all package subdirectories recursively for all *.py and enter
1795 the modules into the archive. If pathname is a plain
1796 directory, listdir *.py and enter all modules. Else, pathname
1797 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001798 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001799 This method will compile the module.py into module.pyc if
1800 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001801 If filterfunc(pathname) is given, it is called with every argument.
1802 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001803 """
Christian Tismer59202e52013-10-21 03:59:23 +02001804 if filterfunc and not filterfunc(pathname):
1805 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001806 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001807 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001808 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001809 dir, name = os.path.split(pathname)
1810 if os.path.isdir(pathname):
1811 initname = os.path.join(pathname, "__init__.py")
1812 if os.path.isfile(initname):
1813 # This is a package directory, add it
1814 if basename:
1815 basename = "%s/%s" % (basename, name)
1816 else:
1817 basename = name
1818 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001819 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001820 fname, arcname = self._get_codename(initname[0:-3], basename)
1821 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001822 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001823 self.write(fname, arcname)
1824 dirlist = os.listdir(pathname)
1825 dirlist.remove("__init__.py")
1826 # Add all *.py files and package subdirectories
1827 for filename in dirlist:
1828 path = os.path.join(pathname, filename)
1829 root, ext = os.path.splitext(filename)
1830 if os.path.isdir(path):
1831 if os.path.isfile(os.path.join(path, "__init__.py")):
1832 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001833 self.writepy(path, basename,
1834 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001835 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001836 if filterfunc and not filterfunc(path):
1837 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001838 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001839 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001840 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001841 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001842 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001843 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001844 self.write(fname, arcname)
1845 else:
1846 # This is NOT a package directory, add its files at top level
1847 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001848 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001849 for filename in os.listdir(pathname):
1850 path = os.path.join(pathname, filename)
1851 root, ext = os.path.splitext(filename)
1852 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001853 if filterfunc and not filterfunc(path):
1854 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001855 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001856 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001857 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001858 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001859 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001860 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001861 self.write(fname, arcname)
1862 else:
1863 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001864 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001865 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001866 fname, arcname = self._get_codename(pathname[0:-3], basename)
1867 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001868 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001869 self.write(fname, arcname)
1870
1871 def _get_codename(self, pathname, basename):
1872 """Return (filename, archivename) for the path.
1873
Fred Drake484d7352000-10-02 21:14:52 +00001874 Given a module name path, return the correct file path and
1875 archive name, compiling if necessary. For example, given
1876 /python/lib/string, return (/python/lib/string.pyc, string).
1877 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001878 def _compile(file, optimize=-1):
1879 import py_compile
1880 if self.debug:
1881 print("Compiling", file)
1882 try:
1883 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001884 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001885 print(err.msg)
1886 return False
1887 return True
1888
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001889 file_py = pathname + ".py"
1890 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001891 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1892 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1893 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001894 if self._optimize == -1:
1895 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001896 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001897 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1898 # Use .pyc file.
1899 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001900 elif (os.path.isfile(pycache_opt0) and
1901 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001902 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1903 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001904 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001905 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001906 elif (os.path.isfile(pycache_opt1) and
1907 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1908 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001909 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001910 fname = pycache_opt1
1911 arcname = file_pyc
1912 elif (os.path.isfile(pycache_opt2) and
1913 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1914 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1915 # file name in the archive.
1916 fname = pycache_opt2
1917 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001918 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001919 # Compile py into PEP 3147 pyc file.
1920 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001921 if sys.flags.optimize == 0:
1922 fname = pycache_opt0
1923 elif sys.flags.optimize == 1:
1924 fname = pycache_opt1
1925 else:
1926 fname = pycache_opt2
1927 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001928 else:
1929 fname = arcname = file_py
1930 else:
1931 # new mode: use given optimization level
1932 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001933 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 arcname = file_pyc
1935 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001936 arcname = file_pyc
1937 if self._optimize == 1:
1938 fname = pycache_opt1
1939 elif self._optimize == 2:
1940 fname = pycache_opt2
1941 else:
1942 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1943 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001944 if not (os.path.isfile(fname) and
1945 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1946 if not _compile(file_py, optimize=self._optimize):
1947 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001948 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001949 if basename:
1950 archivename = "%s/%s" % (basename, archivename)
1951 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001952
1953
1954def main(args = None):
1955 import textwrap
1956 USAGE=textwrap.dedent("""\
1957 Usage:
1958 zipfile.py -l zipfile.zip # Show listing of a zipfile
1959 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1960 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1961 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1962 """)
1963 if args is None:
1964 args = sys.argv[1:]
1965
1966 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001967 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001968 sys.exit(1)
1969
1970 if args[0] == '-l':
1971 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001972 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001973 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001974 with ZipFile(args[1], 'r') as zf:
1975 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001976
1977 elif args[0] == '-t':
1978 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001979 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001980 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001981 with ZipFile(args[1], 'r') as zf:
1982 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001983 if badfile:
1984 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001985 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001986
1987 elif args[0] == '-e':
1988 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001989 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001990 sys.exit(1)
1991
Antoine Pitrou17babc52012-11-17 23:50:08 +01001992 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001993 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001994
1995 elif args[0] == '-c':
1996 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001997 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001998 sys.exit(1)
1999
2000 def addToZip(zf, path, zippath):
2001 if os.path.isfile(path):
2002 zf.write(path, zippath, ZIP_DEFLATED)
2003 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002004 if zippath:
2005 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002006 for nm in os.listdir(path):
2007 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002008 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002009 # else: ignore
2010
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02002011 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002012 for path in args[2:]:
2013 zippath = os.path.basename(path)
2014 if not zippath:
2015 zippath = os.path.basename(os.path.dirname(path))
2016 if zippath in ('', os.curdir, os.pardir):
2017 zippath = ''
2018 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002019
2020if __name__ == "__main__":
2021 main()