blob: cc15ed3f4aa5dfa876bbd7b500882a621a6186fb [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040027except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040032except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030053ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000054ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200167 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200183 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200193 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200199 if len(data) != sizeEndCentDir64Locator:
200 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000201 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
202 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000206 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207
208 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
210 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200211 if len(data) != sizeEndCentDir64:
212 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200214 dircount, dircount2, dirsize, diroffset = \
215 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000216 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 return endrec
218
219 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000220 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221 endrec[_ECD_DISK_NUMBER] = disk_num
222 endrec[_ECD_DISK_START] = disk_dir
223 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
224 endrec[_ECD_ENTRIES_TOTAL] = dircount2
225 endrec[_ECD_SIZE] = dirsize
226 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227 return endrec
228
229
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000230def _EndRecData(fpin):
231 """Return data from the "End of Central Directory" record, or None.
232
233 The data is a list of the nine items in the ZIP "End of central dir"
234 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Determine file size
237 fpin.seek(0, 2)
238 filesize = fpin.tell()
239
240 # Check to see if this is ZIP file with no archive comment (the
241 # "end of central directory" structure should be the last item in the
242 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000243 try:
244 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200245 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000247 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200248 if (len(data) == sizeEndCentDir and
249 data[0:4] == stringEndArchive and
250 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000251 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000253 endrec=list(endrec)
254
255 # Append a blank comment and record start offset
256 endrec.append(b"")
257 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000258
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000259 # Try to read the "Zip64 end of central directory" structure
260 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
262 # Either this is not a ZIP file, or it is a ZIP file with an archive
263 # comment. Search the end of the file for the "end of central directory"
264 # record signature. The comment is the last item in the ZIP file and may be
265 # up to 64K long. It is assumed that the "end of central directory" magic
266 # number does not appear in the comment.
267 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
268 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000270 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000271 if start >= 0:
272 # found the magic number; attempt to unpack and interpret
273 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200274 if len(recData) != sizeEndCentDir:
275 # Zip file is corrupted.
276 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400278 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
279 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
280 endrec.append(comment)
281 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000282
R David Murray4fbb9db2011-06-09 15:50:51 -0400283 # Try to read the "Zip64 end of central directory" structure
284 return _EndRecData64(fpin, maxCommentStart + start - filesize,
285 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200288 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000289
Fred Drake484d7352000-10-02 21:14:52 +0000290
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000292 """Class with attributes describing each file in the ZIP archive."""
293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200295 'orig_filename',
296 'filename',
297 'date_time',
298 'compress_type',
299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000338 self.comment = b"" # Comment for each file
339 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000340 if sys.platform == 'win32':
341 self.create_system = 0 # System which created ZIP archive
342 else:
343 # Assume everything else is unix-y
344 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200345 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
346 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000347 self.reserved = 0 # Must be zero
348 self.flag_bits = 0 # ZIP flag bits
349 self.volume = 0 # Volume number of file header
350 self.internal_attr = 0 # Internal attributes
351 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000352 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000353 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000354 # CRC CRC-32 of the uncompressed file
355 # compress_size Size of the compressed file
356 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200358 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000359 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000360 dt = self.date_time
361 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000362 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000364 # Set these to zero because we write them after the file data
365 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000366 else:
Tim Peterse1190062001-01-15 03:34:38 +0000367 CRC = self.CRC
368 compress_size = self.compress_size
369 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370
371 extra = self.extra
372
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200373 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200374 if zip64 is None:
375 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
376 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200379 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200380 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
381 if not zip64:
382 raise LargeZipFile("Filesize would require ZIP64 extensions")
383 # File is larger than what fits into a 4 byte integer,
384 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000385 file_size = 0xffffffff
386 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200387 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000388
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200389 if self.compress_type == ZIP_BZIP2:
390 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200391 elif self.compress_type == ZIP_LZMA:
392 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200393
394 self.extract_version = max(min_version, self.extract_version)
395 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000396 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000397 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200398 self.extract_version, self.reserved, flag_bits,
399 self.compress_type, dostime, dosdate, CRC,
400 compress_size, file_size,
401 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000402 return header + filename + extra
403
404 def _encodeFilenameFlags(self):
405 try:
406 return self.filename.encode('ascii'), self.flag_bits
407 except UnicodeEncodeError:
408 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409
410 def _decodeExtra(self):
411 # Try to decode the extra field.
412 extra = self.extra
413 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700414 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000415 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000416 if tp == 1:
417 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000418 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000420 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000421 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000422 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000423 elif ln == 0:
424 counts = ()
425 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000426 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427
428 idx = 0
429
430 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000431 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432 self.file_size = counts[idx]
433 idx += 1
434
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000435 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000436 self.compress_size = counts[idx]
437 idx += 1
438
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000439 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000440 old = self.header_offset
441 self.header_offset = counts[idx]
442 idx+=1
443
444 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000445
446
Thomas Wouterscf297e42007-02-23 15:07:44 +0000447class _ZipDecrypter:
448 """Class to handle decryption of files stored within a ZIP archive.
449
450 ZIP supports a password-based form of encryption. Even though known
451 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000452 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000453
454 Usage:
455 zd = _ZipDecrypter(mypwd)
456 plain_char = zd(cypher_char)
457 plain_text = map(zd, cypher_text)
458 """
459
460 def _GenerateCRCTable():
461 """Generate a CRC-32 table.
462
463 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
464 internal keys. We noticed that a direct implementation is faster than
465 relying on binascii.crc32().
466 """
467 poly = 0xedb88320
468 table = [0] * 256
469 for i in range(256):
470 crc = i
471 for j in range(8):
472 if crc & 1:
473 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
474 else:
475 crc = ((crc >> 1) & 0x7FFFFFFF)
476 table[i] = crc
477 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500478 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000479
480 def _crc32(self, ch, crc):
481 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000482 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000483
484 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500485 if _ZipDecrypter.crctable is None:
486 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000487 self.key0 = 305419896
488 self.key1 = 591751049
489 self.key2 = 878082192
490 for p in pwd:
491 self._UpdateKeys(p)
492
493 def _UpdateKeys(self, c):
494 self.key0 = self._crc32(c, self.key0)
495 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
496 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000497 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000498
499 def __call__(self, c):
500 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000501 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502 k = self.key2 | 2
503 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000504 self._UpdateKeys(c)
505 return c
506
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200507
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200508class LZMACompressor:
509
510 def __init__(self):
511 self._comp = None
512
513 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200514 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200515 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200516 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200517 ])
518 return struct.pack('<BBH', 9, 4, len(props)) + props
519
520 def compress(self, data):
521 if self._comp is None:
522 return self._init() + self._comp.compress(data)
523 return self._comp.compress(data)
524
525 def flush(self):
526 if self._comp is None:
527 return self._init() + self._comp.flush()
528 return self._comp.flush()
529
530
531class LZMADecompressor:
532
533 def __init__(self):
534 self._decomp = None
535 self._unconsumed = b''
536 self.eof = False
537
538 def decompress(self, data):
539 if self._decomp is None:
540 self._unconsumed += data
541 if len(self._unconsumed) <= 4:
542 return b''
543 psize, = struct.unpack('<H', self._unconsumed[2:4])
544 if len(self._unconsumed) <= 4 + psize:
545 return b''
546
547 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200548 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
549 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200550 ])
551 data = self._unconsumed[4 + psize:]
552 del self._unconsumed
553
554 result = self._decomp.decompress(data)
555 self.eof = self._decomp.eof
556 return result
557
558
559compressor_names = {
560 0: 'store',
561 1: 'shrink',
562 2: 'reduce',
563 3: 'reduce',
564 4: 'reduce',
565 5: 'reduce',
566 6: 'implode',
567 7: 'tokenize',
568 8: 'deflate',
569 9: 'deflate64',
570 10: 'implode',
571 12: 'bzip2',
572 14: 'lzma',
573 18: 'terse',
574 19: 'lz77',
575 97: 'wavpack',
576 98: 'ppmd',
577}
578
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200579def _check_compression(compression):
580 if compression == ZIP_STORED:
581 pass
582 elif compression == ZIP_DEFLATED:
583 if not zlib:
584 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200585 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200586 elif compression == ZIP_BZIP2:
587 if not bz2:
588 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200589 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200590 elif compression == ZIP_LZMA:
591 if not lzma:
592 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200593 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200594 else:
595 raise RuntimeError("That compression method is not supported")
596
597
598def _get_compressor(compress_type):
599 if compress_type == ZIP_DEFLATED:
600 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200601 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200602 elif compress_type == ZIP_BZIP2:
603 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200604 elif compress_type == ZIP_LZMA:
605 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200606 else:
607 return None
608
609
610def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200611 if compress_type == ZIP_STORED:
612 return None
613 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200614 return zlib.decompressobj(-15)
615 elif compress_type == ZIP_BZIP2:
616 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200617 elif compress_type == ZIP_LZMA:
618 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200619 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200620 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200621 if descr:
622 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
623 else:
624 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200625
626
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200627class _SharedFile:
628 def __init__(self, file, pos, close):
629 self._file = file
630 self._pos = pos
631 self._close = close
632
633 def read(self, n=-1):
634 self._file.seek(self._pos)
635 data = self._file.read(n)
636 self._pos = self._file.tell()
637 return data
638
639 def close(self):
640 if self._file is not None:
641 fileobj = self._file
642 self._file = None
643 self._close(fileobj)
644
645
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000646class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000647 """File-like object for reading an archive member.
648 Is returned by ZipFile.open().
649 """
650
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000651 # Max size supported by decompressor.
652 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000653
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000654 # Read from compressed files in 4k blocks.
655 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000657 # Search for universal newlines or line chunks.
658 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
659
Łukasz Langae94980a2010-11-22 23:31:26 +0000660 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
661 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000662 self._fileobj = fileobj
663 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000664 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000665
Ezio Melotti92b47432010-01-28 01:44:41 +0000666 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000667 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200668 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000669
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200670 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000671
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200672 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000673 self._readbuffer = b''
674 self._offset = 0
675
676 self._universal = 'U' in mode
677 self.newlines = None
678
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000679 # Adjust read size for encrypted files since the first 12 bytes
680 # are for the encryption/password information.
681 if self._decrypter is not None:
682 self._compress_left -= 12
683
684 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000685 self.name = zipinfo.filename
686
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000687 if hasattr(zipinfo, 'CRC'):
688 self._expected_crc = zipinfo.CRC
689 self._running_crc = crc32(b'') & 0xffffffff
690 else:
691 self._expected_crc = None
692
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000693 def readline(self, limit=-1):
694 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000695
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000696 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000697 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000699 if not self._universal and limit < 0:
700 # Shortcut common case - newline found in buffer.
701 i = self._readbuffer.find(b'\n', self._offset) + 1
702 if i > 0:
703 line = self._readbuffer[self._offset: i]
704 self._offset = i
705 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000706
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000707 if not self._universal:
708 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000709
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000710 line = b''
711 while limit < 0 or len(line) < limit:
712 readahead = self.peek(2)
713 if readahead == b'':
714 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000715
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000716 #
717 # Search for universal newlines or line chunks.
718 #
719 # The pattern returns either a line chunk or a newline, but not
720 # both. Combined with peek(2), we are assured that the sequence
721 # '\r\n' is always retrieved completely and never split into
722 # separate newlines - '\r', '\n' due to coincidental readaheads.
723 #
724 match = self.PATTERN.search(readahead)
725 newline = match.group('newline')
726 if newline is not None:
727 if self.newlines is None:
728 self.newlines = []
729 if newline not in self.newlines:
730 self.newlines.append(newline)
731 self._offset += len(newline)
732 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000733
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000734 chunk = match.group('chunk')
735 if limit >= 0:
736 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000737
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000738 self._offset += len(chunk)
739 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000740
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000741 return line
742
743 def peek(self, n=1):
744 """Returns buffered bytes without advancing the position."""
745 if n > len(self._readbuffer) - self._offset:
746 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200747 if len(chunk) > self._offset:
748 self._readbuffer = chunk + self._readbuffer[self._offset:]
749 self._offset = 0
750 else:
751 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000752
753 # Return up to 512 bytes to reduce allocation overhead for tight loops.
754 return self._readbuffer[self._offset: self._offset + 512]
755
756 def readable(self):
757 return True
758
759 def read(self, n=-1):
760 """Read and return up to n bytes.
761 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000762 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200763 if n is None or n < 0:
764 buf = self._readbuffer[self._offset:]
765 self._readbuffer = b''
766 self._offset = 0
767 while not self._eof:
768 buf += self._read1(self.MAX_N)
769 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000770
Antoine Pitrou78157b32012-06-23 16:44:48 +0200771 end = n + self._offset
772 if end < len(self._readbuffer):
773 buf = self._readbuffer[self._offset:end]
774 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200775 return buf
776
Antoine Pitrou78157b32012-06-23 16:44:48 +0200777 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200778 buf = self._readbuffer[self._offset:]
779 self._readbuffer = b''
780 self._offset = 0
781 while n > 0 and not self._eof:
782 data = self._read1(n)
783 if n < len(data):
784 self._readbuffer = data
785 self._offset = n
786 buf += data[:n]
787 break
788 buf += data
789 n -= len(data)
790 return buf
791
792 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000793 # Update the CRC using the given data.
794 if self._expected_crc is None:
795 # No need to compute the CRC if we don't have a reference value
796 return
797 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
798 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000800 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000801
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000802 def read1(self, n):
803 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200805 if n is None or n < 0:
806 buf = self._readbuffer[self._offset:]
807 self._readbuffer = b''
808 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300809 while not self._eof:
810 data = self._read1(self.MAX_N)
811 if data:
812 buf += data
813 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200814 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000815
Antoine Pitrou78157b32012-06-23 16:44:48 +0200816 end = n + self._offset
817 if end < len(self._readbuffer):
818 buf = self._readbuffer[self._offset:end]
819 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200820 return buf
821
Antoine Pitrou78157b32012-06-23 16:44:48 +0200822 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200823 buf = self._readbuffer[self._offset:]
824 self._readbuffer = b''
825 self._offset = 0
826 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300827 while not self._eof:
828 data = self._read1(n)
829 if n < len(data):
830 self._readbuffer = data
831 self._offset = n
832 buf += data[:n]
833 break
834 if data:
835 buf += data
836 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200837 return buf
838
839 def _read1(self, n):
840 # Read up to n compressed bytes with at most one read() system call,
841 # decrypt and decompress them.
842 if self._eof or n <= 0:
843 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000845 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200846 if self._compress_type == ZIP_DEFLATED:
847 ## Handle unconsumed data.
848 data = self._decompressor.unconsumed_tail
849 if n > len(data):
850 data += self._read2(n - len(data))
851 else:
852 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200854 if self._compress_type == ZIP_STORED:
855 self._eof = self._compress_left <= 0
856 elif self._compress_type == ZIP_DEFLATED:
857 n = max(n, self.MIN_READ_SIZE)
858 data = self._decompressor.decompress(data, n)
859 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200860 self._compress_left <= 0 and
861 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200862 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000863 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200864 else:
865 data = self._decompressor.decompress(data)
866 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200868 data = data[:self._left]
869 self._left -= len(data)
870 if self._left <= 0:
871 self._eof = True
872 self._update_crc(data)
873 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000874
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200875 def _read2(self, n):
876 if self._compress_left <= 0:
877 return b''
878
879 n = max(n, self.MIN_READ_SIZE)
880 n = min(n, self._compress_left)
881
882 data = self._fileobj.read(n)
883 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200884 if not data:
885 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200886
887 if self._decrypter is not None:
888 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000889 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890
Łukasz Langae94980a2010-11-22 23:31:26 +0000891 def close(self):
892 try:
893 if self._close_fileobj:
894 self._fileobj.close()
895 finally:
896 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000898
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000899class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000900 """ Class with methods to open, read, write, close, list zip files.
901
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200902 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000903
Fred Drake3d9091e2001-03-26 15:49:24 +0000904 file: Either the path to the file, or a file-like object.
905 If it is a path, the file will be opened and closed by ZipFile.
906 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200907 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
908 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000909 allowZip64: if True ZipFile will create files with ZIP64 extensions when
910 needed, otherwise it will raise an exception when this would
911 be necessary.
912
Fred Drake3d9091e2001-03-26 15:49:24 +0000913 """
Fred Drake484d7352000-10-02 21:14:52 +0000914
Fred Drake90eac282001-02-28 05:29:34 +0000915 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800916 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000917
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200918 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000919 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000920 if mode not in ("r", "w", "a"):
921 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
922
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200923 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000924
925 self._allowZip64 = allowZip64
926 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000927 self.debug = 0 # Level of printing: 0 through 3
928 self.NameToInfo = {} # Find file info given name
929 self.filelist = [] # List of ZipInfo instances for archive
930 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200931 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +0000932 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400933 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000934
Fred Drake3d9091e2001-03-26 15:49:24 +0000935 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000936 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000937 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000938 self._filePassed = 0
939 self.filename = file
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200940 modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
941 'r+b': 'w+b', 'w+b': 'wb'}
942 filemode = modeDict[mode]
943 while True:
944 try:
945 self.fp = io.open(file, filemode)
946 except OSError:
947 if filemode in modeDict:
948 filemode = modeDict[filemode]
949 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +0000950 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200951 break
Fred Drake3d9091e2001-03-26 15:49:24 +0000952 else:
953 self._filePassed = 1
954 self.fp = file
955 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200956 self._fileRefCnt = 1
Tim Petersa19a1682001-03-29 04:36:09 +0000957
Antoine Pitrou17babc52012-11-17 23:50:08 +0100958 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200959 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000960 self._RealGetContents()
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200961 elif mode == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000962 # set the modified flag so central directory gets written
963 # even if no files are added to the archive
964 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200965 self.start_dir = 0
966 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +0100967 try:
968 # See if file is a zip file
969 self._RealGetContents()
970 # seek to start of directory and overwrite
971 self.fp.seek(self.start_dir, 0)
972 except BadZipFile:
973 # file is not a zip file, just append
974 self.fp.seek(0, 2)
975
976 # set the modified flag so central directory gets written
977 # even if no files are added to the archive
978 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200979 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100980 else:
981 raise RuntimeError('Mode must be "r", "w" or "a"')
982 except:
983 fp = self.fp
984 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200985 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +0100986 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000988 def __enter__(self):
989 return self
990
991 def __exit__(self, type, value, traceback):
992 self.close()
993
Tim Peters7d3bad62001-04-04 18:56:49 +0000994 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000995 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000997 try:
998 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200999 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001000 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001001 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001002 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001004 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001005 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1006 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001007 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001008
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001009 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001010 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001011 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1012 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001013 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1014
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001016 inferred = concat + offset_cd
1017 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001018 # self.start_dir: Position of start of central directory
1019 self.start_dir = offset_cd + concat
1020 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001021 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001022 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 total = 0
1024 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001025 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001026 if len(centdir) != sizeCentralDir:
1027 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001029 if centdir[_CD_SIGNATURE] != stringCentralDir:
1030 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001031 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001032 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001033 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001034 flags = centdir[5]
1035 if flags & 0x800:
1036 # UTF-8 file names extension
1037 filename = filename.decode('utf-8')
1038 else:
1039 # Historical ZIP filename encoding
1040 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001042 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001043 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1044 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001046 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001047 x.flag_bits, x.compress_type, t, d,
1048 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001049 if x.extract_version > MAX_EXTRACT_VERSION:
1050 raise NotImplementedError("zip file version %.1f" %
1051 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001052 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1053 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001054 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001055 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001056 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001057
1058 x._decodeExtra()
1059 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 self.filelist.append(x)
1061 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001062
1063 # update total bytes read from central directory
1064 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1065 + centdir[_CD_EXTRA_FIELD_LENGTH]
1066 + centdir[_CD_COMMENT_LENGTH])
1067
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001068 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001069 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001070
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071
1072 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001073 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001074 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001075
1076 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001077 """Return a list of class ZipInfo instances for files in the
1078 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001079 return self.filelist
1080
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001081 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001082 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001083 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1084 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001086 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001087 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1088 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089
1090 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001091 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001092 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 for zinfo in self.filelist:
1094 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001095 # Read by chunks, to avoid an OverflowError or a
1096 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001097 with self.open(zinfo.filename, "r") as f:
1098 while f.read(chunk_size): # Check CRC-32
1099 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001100 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 return zinfo.filename
1102
1103 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001104 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001105 info = self.NameToInfo.get(name)
1106 if info is None:
1107 raise KeyError(
1108 'There is no item named %r in the archive' % name)
1109
1110 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111
Thomas Wouterscf297e42007-02-23 15:07:44 +00001112 def setpassword(self, pwd):
1113 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001114 if pwd and not isinstance(pwd, bytes):
1115 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1116 if pwd:
1117 self.pwd = pwd
1118 else:
1119 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001120
R David Murrayf50b38a2012-04-12 18:44:58 -04001121 @property
1122 def comment(self):
1123 """The comment text associated with the ZIP file."""
1124 return self._comment
1125
1126 @comment.setter
1127 def comment(self, comment):
1128 if not isinstance(comment, bytes):
1129 raise TypeError("comment: expected bytes, got %s" % type(comment))
1130 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001131 if len(comment) > ZIP_MAX_COMMENT:
1132 import warnings
1133 warnings.warn('Archive comment is too long; truncating to %d bytes'
1134 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001135 comment = comment[:ZIP_MAX_COMMENT]
1136 self._comment = comment
1137 self._didModify = True
1138
Thomas Wouterscf297e42007-02-23 15:07:44 +00001139 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001140 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001141 with self.open(name, "r", pwd) as fp:
1142 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143
1144 def open(self, name, mode="r", pwd=None):
1145 """Return file-like object for 'name'."""
1146 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001147 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001148 if 'U' in mode:
1149 import warnings
1150 warnings.warn("'U' mode is deprecated",
1151 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001152 if pwd and not isinstance(pwd, bytes):
1153 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001155 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001156 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001157
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001158 # Make sure we have an info object
1159 if isinstance(name, ZipInfo):
1160 # 'name' is already an info object
1161 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001162 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001163 # Get info object for name
1164 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001166 self._fileRefCnt += 1
1167 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001168 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001169 # Skip the file header:
1170 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001171 if len(fheader) != sizeFileHeader:
1172 raise BadZipFile("Truncated file header")
1173 fheader = struct.unpack(structFileHeader, fheader)
1174 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001175 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001176
Antoine Pitrou17babc52012-11-17 23:50:08 +01001177 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1178 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1179 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001180
Antoine Pitrou8572da52012-11-17 23:52:05 +01001181 if zinfo.flag_bits & 0x20:
1182 # Zip 2.7: compressed patched data
1183 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001184
Antoine Pitrou8572da52012-11-17 23:52:05 +01001185 if zinfo.flag_bits & 0x40:
1186 # strong encryption
1187 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001188
Antoine Pitrou17babc52012-11-17 23:50:08 +01001189 if zinfo.flag_bits & 0x800:
1190 # UTF-8 filename
1191 fname_str = fname.decode("utf-8")
1192 else:
1193 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001194
Antoine Pitrou17babc52012-11-17 23:50:08 +01001195 if fname_str != zinfo.orig_filename:
1196 raise BadZipFile(
1197 'File name in directory %r and header %r differ.'
1198 % (zinfo.orig_filename, fname))
1199
1200 # check for encrypted flag & handle password
1201 is_encrypted = zinfo.flag_bits & 0x1
1202 zd = None
1203 if is_encrypted:
1204 if not pwd:
1205 pwd = self.pwd
1206 if not pwd:
1207 raise RuntimeError("File %s is encrypted, password "
1208 "required for extraction" % name)
1209
1210 zd = _ZipDecrypter(pwd)
1211 # The first 12 bytes in the cypher stream is an encryption header
1212 # used to strengthen the algorithm. The first 11 bytes are
1213 # completely random, while the 12th contains the MSB of the CRC,
1214 # or the MSB of the file time depending on the header type
1215 # and is used to check the correctness of the password.
1216 header = zef_file.read(12)
1217 h = list(map(zd, header[0:12]))
1218 if zinfo.flag_bits & 0x8:
1219 # compare against the file type from extended local headers
1220 check_byte = (zinfo._raw_time >> 8) & 0xff
1221 else:
1222 # compare against the CRC otherwise
1223 check_byte = (zinfo.CRC >> 24) & 0xff
1224 if h[11] != check_byte:
1225 raise RuntimeError("Bad password for file", name)
1226
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001227 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001228 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001229 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001230 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001231
Christian Heimes790c8232008-01-07 21:14:23 +00001232 def extract(self, member, path=None, pwd=None):
1233 """Extract a member from the archive to the current working directory,
1234 using its full name. Its file information is extracted as accurately
1235 as possible. `member' may be a filename or a ZipInfo object. You can
1236 specify a different directory using `path'.
1237 """
1238 if not isinstance(member, ZipInfo):
1239 member = self.getinfo(member)
1240
1241 if path is None:
1242 path = os.getcwd()
1243
1244 return self._extract_member(member, path, pwd)
1245
1246 def extractall(self, path=None, members=None, pwd=None):
1247 """Extract all members from the archive to the current working
1248 directory. `path' specifies a different directory to extract to.
1249 `members' is optional and must be a subset of the list returned
1250 by namelist().
1251 """
1252 if members is None:
1253 members = self.namelist()
1254
1255 for zipinfo in members:
1256 self.extract(zipinfo, path, pwd)
1257
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001258 @classmethod
1259 def _sanitize_windows_name(cls, arcname, pathsep):
1260 """Replace bad characters and remove trailing dots from parts."""
1261 table = cls._windows_illegal_name_trans_table
1262 if not table:
1263 illegal = ':<>|"?*'
1264 table = str.maketrans(illegal, '_' * len(illegal))
1265 cls._windows_illegal_name_trans_table = table
1266 arcname = arcname.translate(table)
1267 # remove trailing dots
1268 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1269 # rejoin, removing empty parts.
1270 arcname = pathsep.join(x for x in arcname if x)
1271 return arcname
1272
Christian Heimes790c8232008-01-07 21:14:23 +00001273 def _extract_member(self, member, targetpath, pwd):
1274 """Extract the ZipInfo object 'member' to a physical
1275 file on the path targetpath.
1276 """
1277 # build the destination pathname, replacing
1278 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001279 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001280
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001281 if os.path.altsep:
1282 arcname = arcname.replace(os.path.altsep, os.path.sep)
1283 # interpret absolute pathname as relative, remove drive letter or
1284 # UNC path, redundant separators, "." and ".." components.
1285 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001286 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001287 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001288 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001289 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001290 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001291 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001292
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001293 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001294 targetpath = os.path.normpath(targetpath)
1295
1296 # Create all upper directories if necessary.
1297 upperdirs = os.path.dirname(targetpath)
1298 if upperdirs and not os.path.exists(upperdirs):
1299 os.makedirs(upperdirs)
1300
Martin v. Löwis59e47792009-01-24 14:10:07 +00001301 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001302 if not os.path.isdir(targetpath):
1303 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001304 return targetpath
1305
Antoine Pitrou17babc52012-11-17 23:50:08 +01001306 with self.open(member, pwd=pwd) as source, \
1307 open(targetpath, "wb") as target:
1308 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001309
1310 return targetpath
1311
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001312 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001313 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001314 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001315 import warnings
1316 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001318 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001320 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001321 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001322 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001323 if not self._allowZip64:
1324 requires_zip64 = None
1325 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1326 requires_zip64 = "Files count"
1327 elif zinfo.file_size > ZIP64_LIMIT:
1328 requires_zip64 = "Filesize"
1329 elif zinfo.header_offset > ZIP64_LIMIT:
1330 requires_zip64 = "Zipfile size"
1331 if requires_zip64:
1332 raise LargeZipFile(requires_zip64 +
1333 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001334
1335 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001336 """Put the bytes from filename into the archive under the name
1337 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001338 if not self.fp:
1339 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001340 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001341
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001343 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001344 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001345 date_time = mtime[0:6]
1346 # Create ZipInfo instance to store file information
1347 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001348 arcname = filename
1349 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1350 while arcname[0] in (os.sep, os.altsep):
1351 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001352 if isdir:
1353 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001354 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001355 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001356 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001357 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 else:
Tim Peterse1190062001-01-15 03:34:38 +00001359 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001360
1361 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001362 zinfo.flag_bits = 0x00
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001363 self.fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +00001364 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001365 if zinfo.compress_type == ZIP_LZMA:
1366 # Compressed data includes an end-of-stream (EOS) marker
1367 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001368
1369 self._writecheck(zinfo)
1370 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001371
1372 if isdir:
1373 zinfo.file_size = 0
1374 zinfo.compress_size = 0
1375 zinfo.CRC = 0
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001376 zinfo.external_attr |= 0x10 # MS-DOS directory flag
Martin v. Löwis59e47792009-01-24 14:10:07 +00001377 self.filelist.append(zinfo)
1378 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001379 self.fp.write(zinfo.FileHeader(False))
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001380 self.start_dir = self.fp.tell()
Martin v. Löwis59e47792009-01-24 14:10:07 +00001381 return
1382
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001383 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001384 with open(filename, "rb") as fp:
1385 # Must overwrite CRC and sizes with correct data later
1386 zinfo.CRC = CRC = 0
1387 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001388 # Compressed size can be larger than uncompressed size
1389 zip64 = self._allowZip64 and \
Christian Tismer59202e52013-10-21 03:59:23 +02001390 zinfo.file_size * 1.05 > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001391 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001392 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001393 while 1:
1394 buf = fp.read(1024 * 8)
1395 if not buf:
1396 break
1397 file_size = file_size + len(buf)
1398 CRC = crc32(buf, CRC) & 0xffffffff
1399 if cmpr:
1400 buf = cmpr.compress(buf)
1401 compress_size = compress_size + len(buf)
1402 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001403 if cmpr:
1404 buf = cmpr.flush()
1405 compress_size = compress_size + len(buf)
1406 self.fp.write(buf)
1407 zinfo.compress_size = compress_size
1408 else:
1409 zinfo.compress_size = file_size
1410 zinfo.CRC = CRC
1411 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001412 if not zip64 and self._allowZip64:
1413 if file_size > ZIP64_LIMIT:
1414 raise RuntimeError('File size has increased during compressing')
1415 if compress_size > ZIP64_LIMIT:
1416 raise RuntimeError('Compressed size larger than uncompressed size')
1417 # Seek backwards and write file header (which will now include
1418 # correct CRC and file sizes)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001419 self.start_dir = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001420 self.fp.seek(zinfo.header_offset, 0)
1421 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001422 self.fp.seek(self.start_dir, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001423 self.filelist.append(zinfo)
1424 self.NameToInfo[zinfo.filename] = zinfo
1425
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001426 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001427 """Write a file into the archive. The contents is 'data', which
1428 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1429 it is encoded as UTF-8 first.
1430 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001431 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001432 if isinstance(data, str):
1433 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001434 if not isinstance(zinfo_or_arcname, ZipInfo):
1435 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001436 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001437 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001438 if zinfo.filename[-1] == '/':
1439 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1440 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1441 else:
1442 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001443 else:
1444 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001445
1446 if not self.fp:
1447 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001448 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001449
Guido van Rossum85825dc2007-08-27 17:03:28 +00001450 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001451 self.fp.seek(self.start_dir, 0)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001452 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001453 if compress_type is not None:
1454 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001455 if zinfo.compress_type == ZIP_LZMA:
1456 # Compressed data includes an end-of-stream (EOS) marker
1457 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001458
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001459 self._writecheck(zinfo)
1460 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001461 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001462 co = _get_compressor(zinfo.compress_type)
1463 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001464 data = co.compress(data) + co.flush()
1465 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001466 else:
1467 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001468 zip64 = zinfo.file_size > ZIP64_LIMIT or \
Christian Tismer59202e52013-10-21 03:59:23 +02001469 zinfo.compress_size > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001470 if zip64 and not self._allowZip64:
1471 raise LargeZipFile("Filesize would require ZIP64 extensions")
1472 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001473 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001474 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001475 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001476 fmt = '<LQQ' if zip64 else '<LLL'
1477 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Christian Tismer59202e52013-10-21 03:59:23 +02001478 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001479 self.fp.flush()
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001480 self.start_dir = self.fp.tell()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001481 self.filelist.append(zinfo)
1482 self.NameToInfo[zinfo.filename] = zinfo
1483
1484 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001485 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001486 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001487
1488 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001489 """Close the file, and for mode "w" and "a" write the ending
1490 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001491 if self.fp is None:
1492 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001493
Antoine Pitrou17babc52012-11-17 23:50:08 +01001494 try:
1495 if self.mode in ("w", "a") and self._didModify: # write ending records
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001496 self.fp.seek(self.start_dir, 0)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001497 for zinfo in self.filelist: # write central directory
Antoine Pitrou17babc52012-11-17 23:50:08 +01001498 dt = zinfo.date_time
1499 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1500 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1501 extra = []
1502 if zinfo.file_size > ZIP64_LIMIT \
Christian Tismer59202e52013-10-21 03:59:23 +02001503 or zinfo.compress_size > ZIP64_LIMIT:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001504 extra.append(zinfo.file_size)
1505 extra.append(zinfo.compress_size)
1506 file_size = 0xffffffff
1507 compress_size = 0xffffffff
1508 else:
1509 file_size = zinfo.file_size
1510 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001511
Antoine Pitrou17babc52012-11-17 23:50:08 +01001512 if zinfo.header_offset > ZIP64_LIMIT:
1513 extra.append(zinfo.header_offset)
1514 header_offset = 0xffffffff
1515 else:
1516 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001517
Antoine Pitrou17babc52012-11-17 23:50:08 +01001518 extra_data = zinfo.extra
Antoine Pitrou8572da52012-11-17 23:52:05 +01001519 min_version = 0
Antoine Pitrou17babc52012-11-17 23:50:08 +01001520 if extra:
1521 # Append a ZIP64 field to the extra's
1522 extra_data = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001523 '<HH' + 'Q'*len(extra),
1524 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001525
Antoine Pitrou8572da52012-11-17 23:52:05 +01001526 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001527
Antoine Pitrou8572da52012-11-17 23:52:05 +01001528 if zinfo.compress_type == ZIP_BZIP2:
1529 min_version = max(BZIP2_VERSION, min_version)
1530 elif zinfo.compress_type == ZIP_LZMA:
1531 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001532
Antoine Pitrou8572da52012-11-17 23:52:05 +01001533 extract_version = max(min_version, zinfo.extract_version)
1534 create_version = max(min_version, zinfo.create_version)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001535 try:
1536 filename, flag_bits = zinfo._encodeFilenameFlags()
1537 centdir = struct.pack(structCentralDir,
Christian Tismer59202e52013-10-21 03:59:23 +02001538 stringCentralDir, create_version,
1539 zinfo.create_system, extract_version, zinfo.reserved,
1540 flag_bits, zinfo.compress_type, dostime, dosdate,
1541 zinfo.CRC, compress_size, file_size,
1542 len(filename), len(extra_data), len(zinfo.comment),
1543 0, zinfo.internal_attr, zinfo.external_attr,
1544 header_offset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001545 except DeprecationWarning:
1546 print((structCentralDir, stringCentralDir, create_version,
Christian Tismer59202e52013-10-21 03:59:23 +02001547 zinfo.create_system, extract_version, zinfo.reserved,
1548 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1549 zinfo.CRC, compress_size, file_size,
1550 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1551 0, zinfo.internal_attr, zinfo.external_attr,
1552 header_offset), file=sys.stderr)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001553 raise
1554 self.fp.write(centdir)
1555 self.fp.write(filename)
1556 self.fp.write(extra_data)
1557 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001558
Antoine Pitrou17babc52012-11-17 23:50:08 +01001559 pos2 = self.fp.tell()
1560 # Write end-of-zip-archive record
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001561 centDirCount = len(self.filelist)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001562 centDirSize = pos2 - self.start_dir
1563 centDirOffset = self.start_dir
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001564 requires_zip64 = None
1565 if centDirCount > ZIP_FILECOUNT_LIMIT:
1566 requires_zip64 = "Files count"
1567 elif centDirOffset > ZIP64_LIMIT:
1568 requires_zip64 = "Central directory offset"
1569 elif centDirSize > ZIP64_LIMIT:
1570 requires_zip64 = "Central directory size"
1571 if requires_zip64:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001572 # Need to write the ZIP64 end-of-archive records
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001573 if not self._allowZip64:
1574 raise LargeZipFile(requires_zip64 +
1575 " would require ZIP64 extensions")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001576 zip64endrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001577 structEndArchive64, stringEndArchive64,
1578 44, 45, 45, 0, 0, centDirCount, centDirCount,
1579 centDirSize, centDirOffset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001580 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001581
Antoine Pitrou17babc52012-11-17 23:50:08 +01001582 zip64locrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001583 structEndArchive64Locator,
1584 stringEndArchive64Locator, 0, pos2, 1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001585 self.fp.write(zip64locrec)
1586 centDirCount = min(centDirCount, 0xFFFF)
1587 centDirSize = min(centDirSize, 0xFFFFFFFF)
1588 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001589
Antoine Pitrou17babc52012-11-17 23:50:08 +01001590 endrec = struct.pack(structEndArchive, stringEndArchive,
Christian Tismer59202e52013-10-21 03:59:23 +02001591 0, 0, centDirCount, centDirCount,
1592 centDirSize, centDirOffset, len(self._comment))
Antoine Pitrou17babc52012-11-17 23:50:08 +01001593 self.fp.write(endrec)
1594 self.fp.write(self._comment)
1595 self.fp.flush()
1596 finally:
1597 fp = self.fp
1598 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001599 self._fpclose(fp)
1600
1601 def _fpclose(self, fp):
1602 assert self._fileRefCnt > 0
1603 self._fileRefCnt -= 1
1604 if not self._fileRefCnt and not self._filePassed:
1605 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001606
1607
1608class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001609 """Class to create ZIP archives with Python library files and packages."""
1610
Georg Brandl8334fd92010-12-04 10:26:46 +00001611 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001612 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001613 ZipFile.__init__(self, file, mode=mode, compression=compression,
1614 allowZip64=allowZip64)
1615 self._optimize = optimize
1616
Christian Tismer59202e52013-10-21 03:59:23 +02001617 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001618 """Add all files from "pathname" to the ZIP archive.
1619
Fred Drake484d7352000-10-02 21:14:52 +00001620 If pathname is a package directory, search the directory and
1621 all package subdirectories recursively for all *.py and enter
1622 the modules into the archive. If pathname is a plain
1623 directory, listdir *.py and enter all modules. Else, pathname
1624 must be a Python *.py file and the module will be put into the
1625 archive. Added modules are always module.pyo or module.pyc.
1626 This method will compile the module.py into module.pyc if
1627 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001628 If filterfunc(pathname) is given, it is called with every argument.
1629 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001630 """
Christian Tismer59202e52013-10-21 03:59:23 +02001631 if filterfunc and not filterfunc(pathname):
1632 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001633 label = 'path' if os.path.isdir(pathname) else 'file'
1634 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001635 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001636 dir, name = os.path.split(pathname)
1637 if os.path.isdir(pathname):
1638 initname = os.path.join(pathname, "__init__.py")
1639 if os.path.isfile(initname):
1640 # This is a package directory, add it
1641 if basename:
1642 basename = "%s/%s" % (basename, name)
1643 else:
1644 basename = name
1645 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001646 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001647 fname, arcname = self._get_codename(initname[0:-3], basename)
1648 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001649 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001650 self.write(fname, arcname)
1651 dirlist = os.listdir(pathname)
1652 dirlist.remove("__init__.py")
1653 # Add all *.py files and package subdirectories
1654 for filename in dirlist:
1655 path = os.path.join(pathname, filename)
1656 root, ext = os.path.splitext(filename)
1657 if os.path.isdir(path):
1658 if os.path.isfile(os.path.join(path, "__init__.py")):
1659 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001660 self.writepy(path, basename,
1661 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001662 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001663 if filterfunc and not filterfunc(path):
1664 if self.debug:
1665 print('file "%s" skipped by filterfunc' % path)
1666 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001667 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001668 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001669 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001670 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001671 self.write(fname, arcname)
1672 else:
1673 # This is NOT a package directory, add its files at top level
1674 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001675 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001676 for filename in os.listdir(pathname):
1677 path = os.path.join(pathname, filename)
1678 root, ext = os.path.splitext(filename)
1679 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001680 if filterfunc and not filterfunc(path):
1681 if self.debug:
1682 print('file "%s" skipped by filterfunc' % path)
1683 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001684 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001685 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001686 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001687 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001688 self.write(fname, arcname)
1689 else:
1690 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001691 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001692 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001693 fname, arcname = self._get_codename(pathname[0:-3], basename)
1694 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001695 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001696 self.write(fname, arcname)
1697
1698 def _get_codename(self, pathname, basename):
1699 """Return (filename, archivename) for the path.
1700
Fred Drake484d7352000-10-02 21:14:52 +00001701 Given a module name path, return the correct file path and
1702 archive name, compiling if necessary. For example, given
1703 /python/lib/string, return (/python/lib/string.pyc, string).
1704 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001705 def _compile(file, optimize=-1):
1706 import py_compile
1707 if self.debug:
1708 print("Compiling", file)
1709 try:
1710 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001711 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001712 print(err.msg)
1713 return False
1714 return True
1715
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001716 file_py = pathname + ".py"
1717 file_pyc = pathname + ".pyc"
1718 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001719 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1720 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001721 if self._optimize == -1:
1722 # legacy mode: use whatever file is present
1723 if (os.path.isfile(file_pyo) and
1724 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1725 # Use .pyo file.
1726 arcname = fname = file_pyo
1727 elif (os.path.isfile(file_pyc) and
1728 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1729 # Use .pyc file.
1730 arcname = fname = file_pyc
1731 elif (os.path.isfile(pycache_pyc) and
1732 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1733 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1734 # file name in the archive.
1735 fname = pycache_pyc
1736 arcname = file_pyc
1737 elif (os.path.isfile(pycache_pyo) and
1738 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1739 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1740 # file name in the archive.
1741 fname = pycache_pyo
1742 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001743 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001744 # Compile py into PEP 3147 pyc file.
1745 if _compile(file_py):
1746 fname = (pycache_pyc if __debug__ else pycache_pyo)
1747 arcname = (file_pyc if __debug__ else file_pyo)
1748 else:
1749 fname = arcname = file_py
1750 else:
1751 # new mode: use given optimization level
1752 if self._optimize == 0:
1753 fname = pycache_pyc
1754 arcname = file_pyc
1755 else:
1756 fname = pycache_pyo
1757 arcname = file_pyo
1758 if not (os.path.isfile(fname) and
1759 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1760 if not _compile(file_py, optimize=self._optimize):
1761 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001762 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001763 if basename:
1764 archivename = "%s/%s" % (basename, archivename)
1765 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001766
1767
1768def main(args = None):
1769 import textwrap
1770 USAGE=textwrap.dedent("""\
1771 Usage:
1772 zipfile.py -l zipfile.zip # Show listing of a zipfile
1773 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1774 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1775 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1776 """)
1777 if args is None:
1778 args = sys.argv[1:]
1779
1780 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001781 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001782 sys.exit(1)
1783
1784 if args[0] == '-l':
1785 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001786 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001787 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001788 with ZipFile(args[1], 'r') as zf:
1789 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001790
1791 elif args[0] == '-t':
1792 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001793 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001794 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001795 with ZipFile(args[1], 'r') as zf:
1796 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001797 if badfile:
1798 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001799 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001800
1801 elif args[0] == '-e':
1802 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001803 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001804 sys.exit(1)
1805
Antoine Pitrou17babc52012-11-17 23:50:08 +01001806 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001807 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001808
1809 elif args[0] == '-c':
1810 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001811 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001812 sys.exit(1)
1813
1814 def addToZip(zf, path, zippath):
1815 if os.path.isfile(path):
1816 zf.write(path, zippath, ZIP_DEFLATED)
1817 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001818 if zippath:
1819 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001820 for nm in os.listdir(path):
1821 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001822 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001823 # else: ignore
1824
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001825 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001826 for path in args[2:]:
1827 zippath = os.path.basename(path)
1828 if not zippath:
1829 zippath = os.path.basename(os.path.dirname(path))
1830 if zippath in ('', os.curdir, os.pardir):
1831 zippath = ''
1832 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001833
1834if __name__ == "__main__":
1835 main()