blob: 11d7cf9cb6f11a58234fa8d2ff9b698cf2e97596 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040027except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040032except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030053ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000054ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200167 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200183 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200193 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200199 if len(data) != sizeEndCentDir64Locator:
200 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000201 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
202 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000206 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207
208 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
210 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200211 if len(data) != sizeEndCentDir64:
212 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200214 dircount, dircount2, dirsize, diroffset = \
215 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000216 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 return endrec
218
219 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000220 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221 endrec[_ECD_DISK_NUMBER] = disk_num
222 endrec[_ECD_DISK_START] = disk_dir
223 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
224 endrec[_ECD_ENTRIES_TOTAL] = dircount2
225 endrec[_ECD_SIZE] = dirsize
226 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227 return endrec
228
229
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000230def _EndRecData(fpin):
231 """Return data from the "End of Central Directory" record, or None.
232
233 The data is a list of the nine items in the ZIP "End of central dir"
234 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Determine file size
237 fpin.seek(0, 2)
238 filesize = fpin.tell()
239
240 # Check to see if this is ZIP file with no archive comment (the
241 # "end of central directory" structure should be the last item in the
242 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000243 try:
244 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200245 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000247 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200248 if (len(data) == sizeEndCentDir and
249 data[0:4] == stringEndArchive and
250 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000251 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000253 endrec=list(endrec)
254
255 # Append a blank comment and record start offset
256 endrec.append(b"")
257 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000258
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000259 # Try to read the "Zip64 end of central directory" structure
260 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
262 # Either this is not a ZIP file, or it is a ZIP file with an archive
263 # comment. Search the end of the file for the "end of central directory"
264 # record signature. The comment is the last item in the ZIP file and may be
265 # up to 64K long. It is assumed that the "end of central directory" magic
266 # number does not appear in the comment.
267 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
268 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000270 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000271 if start >= 0:
272 # found the magic number; attempt to unpack and interpret
273 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200274 if len(recData) != sizeEndCentDir:
275 # Zip file is corrupted.
276 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400278 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
279 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
280 endrec.append(comment)
281 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000282
R David Murray4fbb9db2011-06-09 15:50:51 -0400283 # Try to read the "Zip64 end of central directory" structure
284 return _EndRecData64(fpin, maxCommentStart + start - filesize,
285 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200288 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000289
Fred Drake484d7352000-10-02 21:14:52 +0000290
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000292 """Class with attributes describing each file in the ZIP archive."""
293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200295 'orig_filename',
296 'filename',
297 'date_time',
298 'compress_type',
299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000338 self.comment = b"" # Comment for each file
339 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000340 if sys.platform == 'win32':
341 self.create_system = 0 # System which created ZIP archive
342 else:
343 # Assume everything else is unix-y
344 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200345 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
346 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000347 self.reserved = 0 # Must be zero
348 self.flag_bits = 0 # ZIP flag bits
349 self.volume = 0 # Volume number of file header
350 self.internal_attr = 0 # Internal attributes
351 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000352 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000353 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000354 # CRC CRC-32 of the uncompressed file
355 # compress_size Size of the compressed file
356 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200358 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000359 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000360 dt = self.date_time
361 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000362 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000364 # Set these to zero because we write them after the file data
365 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000366 else:
Tim Peterse1190062001-01-15 03:34:38 +0000367 CRC = self.CRC
368 compress_size = self.compress_size
369 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370
371 extra = self.extra
372
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200373 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200374 if zip64 is None:
375 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
376 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200379 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200380 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
381 if not zip64:
382 raise LargeZipFile("Filesize would require ZIP64 extensions")
383 # File is larger than what fits into a 4 byte integer,
384 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000385 file_size = 0xffffffff
386 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200387 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000388
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200389 if self.compress_type == ZIP_BZIP2:
390 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200391 elif self.compress_type == ZIP_LZMA:
392 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200393
394 self.extract_version = max(min_version, self.extract_version)
395 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000396 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000397 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200398 self.extract_version, self.reserved, flag_bits,
399 self.compress_type, dostime, dosdate, CRC,
400 compress_size, file_size,
401 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000402 return header + filename + extra
403
404 def _encodeFilenameFlags(self):
405 try:
406 return self.filename.encode('ascii'), self.flag_bits
407 except UnicodeEncodeError:
408 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409
410 def _decodeExtra(self):
411 # Try to decode the extra field.
412 extra = self.extra
413 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700414 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000415 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000416 if tp == 1:
417 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000418 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000420 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000421 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000422 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000423 elif ln == 0:
424 counts = ()
425 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000426 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427
428 idx = 0
429
430 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000431 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432 self.file_size = counts[idx]
433 idx += 1
434
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000435 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000436 self.compress_size = counts[idx]
437 idx += 1
438
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000439 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000440 old = self.header_offset
441 self.header_offset = counts[idx]
442 idx+=1
443
444 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000445
446
Thomas Wouterscf297e42007-02-23 15:07:44 +0000447class _ZipDecrypter:
448 """Class to handle decryption of files stored within a ZIP archive.
449
450 ZIP supports a password-based form of encryption. Even though known
451 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000452 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000453
454 Usage:
455 zd = _ZipDecrypter(mypwd)
456 plain_char = zd(cypher_char)
457 plain_text = map(zd, cypher_text)
458 """
459
460 def _GenerateCRCTable():
461 """Generate a CRC-32 table.
462
463 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
464 internal keys. We noticed that a direct implementation is faster than
465 relying on binascii.crc32().
466 """
467 poly = 0xedb88320
468 table = [0] * 256
469 for i in range(256):
470 crc = i
471 for j in range(8):
472 if crc & 1:
473 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
474 else:
475 crc = ((crc >> 1) & 0x7FFFFFFF)
476 table[i] = crc
477 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500478 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000479
480 def _crc32(self, ch, crc):
481 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000482 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000483
484 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500485 if _ZipDecrypter.crctable is None:
486 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000487 self.key0 = 305419896
488 self.key1 = 591751049
489 self.key2 = 878082192
490 for p in pwd:
491 self._UpdateKeys(p)
492
493 def _UpdateKeys(self, c):
494 self.key0 = self._crc32(c, self.key0)
495 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
496 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000497 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000498
499 def __call__(self, c):
500 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000501 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502 k = self.key2 | 2
503 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000504 self._UpdateKeys(c)
505 return c
506
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200507
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200508class LZMACompressor:
509
510 def __init__(self):
511 self._comp = None
512
513 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200514 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200515 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200516 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200517 ])
518 return struct.pack('<BBH', 9, 4, len(props)) + props
519
520 def compress(self, data):
521 if self._comp is None:
522 return self._init() + self._comp.compress(data)
523 return self._comp.compress(data)
524
525 def flush(self):
526 if self._comp is None:
527 return self._init() + self._comp.flush()
528 return self._comp.flush()
529
530
531class LZMADecompressor:
532
533 def __init__(self):
534 self._decomp = None
535 self._unconsumed = b''
536 self.eof = False
537
538 def decompress(self, data):
539 if self._decomp is None:
540 self._unconsumed += data
541 if len(self._unconsumed) <= 4:
542 return b''
543 psize, = struct.unpack('<H', self._unconsumed[2:4])
544 if len(self._unconsumed) <= 4 + psize:
545 return b''
546
547 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200548 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
549 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200550 ])
551 data = self._unconsumed[4 + psize:]
552 del self._unconsumed
553
554 result = self._decomp.decompress(data)
555 self.eof = self._decomp.eof
556 return result
557
558
559compressor_names = {
560 0: 'store',
561 1: 'shrink',
562 2: 'reduce',
563 3: 'reduce',
564 4: 'reduce',
565 5: 'reduce',
566 6: 'implode',
567 7: 'tokenize',
568 8: 'deflate',
569 9: 'deflate64',
570 10: 'implode',
571 12: 'bzip2',
572 14: 'lzma',
573 18: 'terse',
574 19: 'lz77',
575 97: 'wavpack',
576 98: 'ppmd',
577}
578
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200579def _check_compression(compression):
580 if compression == ZIP_STORED:
581 pass
582 elif compression == ZIP_DEFLATED:
583 if not zlib:
584 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200585 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200586 elif compression == ZIP_BZIP2:
587 if not bz2:
588 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200589 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200590 elif compression == ZIP_LZMA:
591 if not lzma:
592 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200593 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200594 else:
595 raise RuntimeError("That compression method is not supported")
596
597
598def _get_compressor(compress_type):
599 if compress_type == ZIP_DEFLATED:
600 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200601 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200602 elif compress_type == ZIP_BZIP2:
603 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200604 elif compress_type == ZIP_LZMA:
605 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200606 else:
607 return None
608
609
610def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200611 if compress_type == ZIP_STORED:
612 return None
613 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200614 return zlib.decompressobj(-15)
615 elif compress_type == ZIP_BZIP2:
616 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200617 elif compress_type == ZIP_LZMA:
618 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200619 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200620 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200621 if descr:
622 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
623 else:
624 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200625
626
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000627class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000628 """File-like object for reading an archive member.
629 Is returned by ZipFile.open().
630 """
631
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000632 # Max size supported by decompressor.
633 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000634
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000635 # Read from compressed files in 4k blocks.
636 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000637
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638 # Search for universal newlines or line chunks.
639 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
640
Łukasz Langae94980a2010-11-22 23:31:26 +0000641 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
642 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000643 self._fileobj = fileobj
644 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000645 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000646
Ezio Melotti92b47432010-01-28 01:44:41 +0000647 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000648 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200649 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000650
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200651 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000652
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200653 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000654 self._readbuffer = b''
655 self._offset = 0
656
657 self._universal = 'U' in mode
658 self.newlines = None
659
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000660 # Adjust read size for encrypted files since the first 12 bytes
661 # are for the encryption/password information.
662 if self._decrypter is not None:
663 self._compress_left -= 12
664
665 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000666 self.name = zipinfo.filename
667
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000668 if hasattr(zipinfo, 'CRC'):
669 self._expected_crc = zipinfo.CRC
670 self._running_crc = crc32(b'') & 0xffffffff
671 else:
672 self._expected_crc = None
673
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000674 def readline(self, limit=-1):
675 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000676
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000677 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000678 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000679
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000680 if not self._universal and limit < 0:
681 # Shortcut common case - newline found in buffer.
682 i = self._readbuffer.find(b'\n', self._offset) + 1
683 if i > 0:
684 line = self._readbuffer[self._offset: i]
685 self._offset = i
686 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000687
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000688 if not self._universal:
689 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000690
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000691 line = b''
692 while limit < 0 or len(line) < limit:
693 readahead = self.peek(2)
694 if readahead == b'':
695 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000696
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000697 #
698 # Search for universal newlines or line chunks.
699 #
700 # The pattern returns either a line chunk or a newline, but not
701 # both. Combined with peek(2), we are assured that the sequence
702 # '\r\n' is always retrieved completely and never split into
703 # separate newlines - '\r', '\n' due to coincidental readaheads.
704 #
705 match = self.PATTERN.search(readahead)
706 newline = match.group('newline')
707 if newline is not None:
708 if self.newlines is None:
709 self.newlines = []
710 if newline not in self.newlines:
711 self.newlines.append(newline)
712 self._offset += len(newline)
713 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000714
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000715 chunk = match.group('chunk')
716 if limit >= 0:
717 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000718
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000719 self._offset += len(chunk)
720 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000721
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000722 return line
723
724 def peek(self, n=1):
725 """Returns buffered bytes without advancing the position."""
726 if n > len(self._readbuffer) - self._offset:
727 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200728 if len(chunk) > self._offset:
729 self._readbuffer = chunk + self._readbuffer[self._offset:]
730 self._offset = 0
731 else:
732 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000733
734 # Return up to 512 bytes to reduce allocation overhead for tight loops.
735 return self._readbuffer[self._offset: self._offset + 512]
736
737 def readable(self):
738 return True
739
740 def read(self, n=-1):
741 """Read and return up to n bytes.
742 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000743 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200744 if n is None or n < 0:
745 buf = self._readbuffer[self._offset:]
746 self._readbuffer = b''
747 self._offset = 0
748 while not self._eof:
749 buf += self._read1(self.MAX_N)
750 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751
Antoine Pitrou78157b32012-06-23 16:44:48 +0200752 end = n + self._offset
753 if end < len(self._readbuffer):
754 buf = self._readbuffer[self._offset:end]
755 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200756 return buf
757
Antoine Pitrou78157b32012-06-23 16:44:48 +0200758 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200759 buf = self._readbuffer[self._offset:]
760 self._readbuffer = b''
761 self._offset = 0
762 while n > 0 and not self._eof:
763 data = self._read1(n)
764 if n < len(data):
765 self._readbuffer = data
766 self._offset = n
767 buf += data[:n]
768 break
769 buf += data
770 n -= len(data)
771 return buf
772
773 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000774 # Update the CRC using the given data.
775 if self._expected_crc is None:
776 # No need to compute the CRC if we don't have a reference value
777 return
778 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
779 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200780 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000781 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000782
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000783 def read1(self, n):
784 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200786 if n is None or n < 0:
787 buf = self._readbuffer[self._offset:]
788 self._readbuffer = b''
789 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300790 while not self._eof:
791 data = self._read1(self.MAX_N)
792 if data:
793 buf += data
794 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200795 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796
Antoine Pitrou78157b32012-06-23 16:44:48 +0200797 end = n + self._offset
798 if end < len(self._readbuffer):
799 buf = self._readbuffer[self._offset:end]
800 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200801 return buf
802
Antoine Pitrou78157b32012-06-23 16:44:48 +0200803 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200804 buf = self._readbuffer[self._offset:]
805 self._readbuffer = b''
806 self._offset = 0
807 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300808 while not self._eof:
809 data = self._read1(n)
810 if n < len(data):
811 self._readbuffer = data
812 self._offset = n
813 buf += data[:n]
814 break
815 if data:
816 buf += data
817 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200818 return buf
819
820 def _read1(self, n):
821 # Read up to n compressed bytes with at most one read() system call,
822 # decrypt and decompress them.
823 if self._eof or n <= 0:
824 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000826 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200827 if self._compress_type == ZIP_DEFLATED:
828 ## Handle unconsumed data.
829 data = self._decompressor.unconsumed_tail
830 if n > len(data):
831 data += self._read2(n - len(data))
832 else:
833 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200835 if self._compress_type == ZIP_STORED:
836 self._eof = self._compress_left <= 0
837 elif self._compress_type == ZIP_DEFLATED:
838 n = max(n, self.MIN_READ_SIZE)
839 data = self._decompressor.decompress(data, n)
840 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200841 self._compress_left <= 0 and
842 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200843 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000844 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200845 else:
846 data = self._decompressor.decompress(data)
847 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200849 data = data[:self._left]
850 self._left -= len(data)
851 if self._left <= 0:
852 self._eof = True
853 self._update_crc(data)
854 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000855
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200856 def _read2(self, n):
857 if self._compress_left <= 0:
858 return b''
859
860 n = max(n, self.MIN_READ_SIZE)
861 n = min(n, self._compress_left)
862
863 data = self._fileobj.read(n)
864 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200865 if not data:
866 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200867
868 if self._decrypter is not None:
869 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000870 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871
Łukasz Langae94980a2010-11-22 23:31:26 +0000872 def close(self):
873 try:
874 if self._close_fileobj:
875 self._fileobj.close()
876 finally:
877 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000879
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000880class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000881 """ Class with methods to open, read, write, close, list zip files.
882
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200883 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000884
Fred Drake3d9091e2001-03-26 15:49:24 +0000885 file: Either the path to the file, or a file-like object.
886 If it is a path, the file will be opened and closed by ZipFile.
887 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200888 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
889 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000890 allowZip64: if True ZipFile will create files with ZIP64 extensions when
891 needed, otherwise it will raise an exception when this would
892 be necessary.
893
Fred Drake3d9091e2001-03-26 15:49:24 +0000894 """
Fred Drake484d7352000-10-02 21:14:52 +0000895
Fred Drake90eac282001-02-28 05:29:34 +0000896 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800897 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000898
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200899 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000900 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000901 if mode not in ("r", "w", "a"):
902 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
903
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200904 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000905
906 self._allowZip64 = allowZip64
907 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000908 self.debug = 0 # Level of printing: 0 through 3
909 self.NameToInfo = {} # Find file info given name
910 self.filelist = [] # List of ZipInfo instances for archive
911 self.compression = compression # Method of compression
Serhiy Storchakab76bcc42015-01-26 13:45:39 +0200912 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000913 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400914 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000915
Fred Drake3d9091e2001-03-26 15:49:24 +0000916 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000917 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000918 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000919 self._filePassed = 0
920 self.filename = file
Serhiy Storchakab76bcc42015-01-26 13:45:39 +0200921 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
922 try:
923 self.fp = io.open(file, modeDict[mode])
924 except OSError:
925 if mode == 'a':
926 mode = key = 'w'
927 self.fp = io.open(file, modeDict[mode])
928 else:
Thomas Wouterscf297e42007-02-23 15:07:44 +0000929 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000930 else:
931 self._filePassed = 1
932 self.fp = file
933 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000934
Antoine Pitrou17babc52012-11-17 23:50:08 +0100935 try:
Serhiy Storchakab76bcc42015-01-26 13:45:39 +0200936 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000937 self._RealGetContents()
Serhiy Storchakab76bcc42015-01-26 13:45:39 +0200938 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000939 # set the modified flag so central directory gets written
940 # even if no files are added to the archive
941 self._didModify = True
Serhiy Storchakab76bcc42015-01-26 13:45:39 +0200942 elif key == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +0100943 try:
944 # See if file is a zip file
945 self._RealGetContents()
946 # seek to start of directory and overwrite
947 self.fp.seek(self.start_dir, 0)
948 except BadZipFile:
949 # file is not a zip file, just append
950 self.fp.seek(0, 2)
951
952 # set the modified flag so central directory gets written
953 # even if no files are added to the archive
954 self._didModify = True
955 else:
956 raise RuntimeError('Mode must be "r", "w" or "a"')
957 except:
958 fp = self.fp
959 self.fp = None
Serhiy Storchakab76bcc42015-01-26 13:45:39 +0200960 if not self._filePassed:
961 fp.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100962 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000963
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000964 def __enter__(self):
965 return self
966
967 def __exit__(self, type, value, traceback):
968 self.close()
969
Tim Peters7d3bad62001-04-04 18:56:49 +0000970 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000971 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000972 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000973 try:
974 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200975 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +0000976 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000977 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000978 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000979 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000980 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000981 size_cd = endrec[_ECD_SIZE] # bytes in central directory
982 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400983 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000984
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000985 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000986 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000987 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
988 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000989 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
990
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000991 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000992 inferred = concat + offset_cd
993 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 # self.start_dir: Position of start of central directory
995 self.start_dir = offset_cd + concat
996 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000997 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000998 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999 total = 0
1000 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001001 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001002 if len(centdir) != sizeCentralDir:
1003 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001005 if centdir[_CD_SIGNATURE] != stringCentralDir:
1006 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001008 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001009 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001010 flags = centdir[5]
1011 if flags & 0x800:
1012 # UTF-8 file names extension
1013 filename = filename.decode('utf-8')
1014 else:
1015 # Historical ZIP filename encoding
1016 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001018 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001019 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1020 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001021 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001022 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001023 x.flag_bits, x.compress_type, t, d,
1024 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001025 if x.extract_version > MAX_EXTRACT_VERSION:
1026 raise NotImplementedError("zip file version %.1f" %
1027 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1029 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001030 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001031 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001032 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001033
1034 x._decodeExtra()
1035 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 self.filelist.append(x)
1037 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001038
1039 # update total bytes read from central directory
1040 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1041 + centdir[_CD_EXTRA_FIELD_LENGTH]
1042 + centdir[_CD_COMMENT_LENGTH])
1043
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001044 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001045 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001046
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047
1048 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001049 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001050 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001051
1052 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001053 """Return a list of class ZipInfo instances for files in the
1054 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001055 return self.filelist
1056
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001057 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001058 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001059 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1060 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001062 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001063 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1064 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065
1066 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001067 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001068 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001069 for zinfo in self.filelist:
1070 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001071 # Read by chunks, to avoid an OverflowError or a
1072 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001073 with self.open(zinfo.filename, "r") as f:
1074 while f.read(chunk_size): # Check CRC-32
1075 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001076 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001077 return zinfo.filename
1078
1079 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001080 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001081 info = self.NameToInfo.get(name)
1082 if info is None:
1083 raise KeyError(
1084 'There is no item named %r in the archive' % name)
1085
1086 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087
Thomas Wouterscf297e42007-02-23 15:07:44 +00001088 def setpassword(self, pwd):
1089 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001090 if pwd and not isinstance(pwd, bytes):
1091 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1092 if pwd:
1093 self.pwd = pwd
1094 else:
1095 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001096
R David Murrayf50b38a2012-04-12 18:44:58 -04001097 @property
1098 def comment(self):
1099 """The comment text associated with the ZIP file."""
1100 return self._comment
1101
1102 @comment.setter
1103 def comment(self, comment):
1104 if not isinstance(comment, bytes):
1105 raise TypeError("comment: expected bytes, got %s" % type(comment))
1106 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001107 if len(comment) > ZIP_MAX_COMMENT:
1108 import warnings
1109 warnings.warn('Archive comment is too long; truncating to %d bytes'
1110 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001111 comment = comment[:ZIP_MAX_COMMENT]
1112 self._comment = comment
1113 self._didModify = True
1114
Thomas Wouterscf297e42007-02-23 15:07:44 +00001115 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001116 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001117 with self.open(name, "r", pwd) as fp:
1118 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001119
1120 def open(self, name, mode="r", pwd=None):
1121 """Return file-like object for 'name'."""
1122 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001123 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001124 if 'U' in mode:
1125 import warnings
1126 warnings.warn("'U' mode is deprecated",
1127 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001128 if pwd and not isinstance(pwd, bytes):
1129 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001130 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001131 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001132 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001134 # Only open a new file for instances where we were not
1135 # given a file object in the constructor
1136 if self._filePassed:
1137 zef_file = self.fp
Guido van Rossumd8faa362007-04-27 19:54:29 +00001138 else:
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001139 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001140
Antoine Pitrou17babc52012-11-17 23:50:08 +01001141 try:
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001142 # Make sure we have an info object
1143 if isinstance(name, ZipInfo):
1144 # 'name' is already an info object
1145 zinfo = name
1146 else:
1147 # Get info object for name
1148 zinfo = self.getinfo(name)
1149 zef_file.seek(zinfo.header_offset, 0)
1150
Antoine Pitrou17babc52012-11-17 23:50:08 +01001151 # Skip the file header:
1152 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001153 if len(fheader) != sizeFileHeader:
1154 raise BadZipFile("Truncated file header")
1155 fheader = struct.unpack(structFileHeader, fheader)
1156 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001157 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158
Antoine Pitrou17babc52012-11-17 23:50:08 +01001159 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1160 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1161 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001162
Antoine Pitrou8572da52012-11-17 23:52:05 +01001163 if zinfo.flag_bits & 0x20:
1164 # Zip 2.7: compressed patched data
1165 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001166
Antoine Pitrou8572da52012-11-17 23:52:05 +01001167 if zinfo.flag_bits & 0x40:
1168 # strong encryption
1169 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001170
Antoine Pitrou17babc52012-11-17 23:50:08 +01001171 if zinfo.flag_bits & 0x800:
1172 # UTF-8 filename
1173 fname_str = fname.decode("utf-8")
1174 else:
1175 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001176
Antoine Pitrou17babc52012-11-17 23:50:08 +01001177 if fname_str != zinfo.orig_filename:
1178 raise BadZipFile(
1179 'File name in directory %r and header %r differ.'
1180 % (zinfo.orig_filename, fname))
1181
1182 # check for encrypted flag & handle password
1183 is_encrypted = zinfo.flag_bits & 0x1
1184 zd = None
1185 if is_encrypted:
1186 if not pwd:
1187 pwd = self.pwd
1188 if not pwd:
1189 raise RuntimeError("File %s is encrypted, password "
1190 "required for extraction" % name)
1191
1192 zd = _ZipDecrypter(pwd)
1193 # The first 12 bytes in the cypher stream is an encryption header
1194 # used to strengthen the algorithm. The first 11 bytes are
1195 # completely random, while the 12th contains the MSB of the CRC,
1196 # or the MSB of the file time depending on the header type
1197 # and is used to check the correctness of the password.
1198 header = zef_file.read(12)
1199 h = list(map(zd, header[0:12]))
1200 if zinfo.flag_bits & 0x8:
1201 # compare against the file type from extended local headers
1202 check_byte = (zinfo._raw_time >> 8) & 0xff
1203 else:
1204 # compare against the CRC otherwise
1205 check_byte = (zinfo.CRC >> 24) & 0xff
1206 if h[11] != check_byte:
1207 raise RuntimeError("Bad password for file", name)
1208
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001209 return ZipExtFile(zef_file, mode, zinfo, zd,
1210 close_fileobj=not self._filePassed)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001211 except:
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001212 if not self._filePassed:
1213 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001214 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215
Christian Heimes790c8232008-01-07 21:14:23 +00001216 def extract(self, member, path=None, pwd=None):
1217 """Extract a member from the archive to the current working directory,
1218 using its full name. Its file information is extracted as accurately
1219 as possible. `member' may be a filename or a ZipInfo object. You can
1220 specify a different directory using `path'.
1221 """
1222 if not isinstance(member, ZipInfo):
1223 member = self.getinfo(member)
1224
1225 if path is None:
1226 path = os.getcwd()
1227
1228 return self._extract_member(member, path, pwd)
1229
1230 def extractall(self, path=None, members=None, pwd=None):
1231 """Extract all members from the archive to the current working
1232 directory. `path' specifies a different directory to extract to.
1233 `members' is optional and must be a subset of the list returned
1234 by namelist().
1235 """
1236 if members is None:
1237 members = self.namelist()
1238
1239 for zipinfo in members:
1240 self.extract(zipinfo, path, pwd)
1241
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001242 @classmethod
1243 def _sanitize_windows_name(cls, arcname, pathsep):
1244 """Replace bad characters and remove trailing dots from parts."""
1245 table = cls._windows_illegal_name_trans_table
1246 if not table:
1247 illegal = ':<>|"?*'
1248 table = str.maketrans(illegal, '_' * len(illegal))
1249 cls._windows_illegal_name_trans_table = table
1250 arcname = arcname.translate(table)
1251 # remove trailing dots
1252 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1253 # rejoin, removing empty parts.
1254 arcname = pathsep.join(x for x in arcname if x)
1255 return arcname
1256
Christian Heimes790c8232008-01-07 21:14:23 +00001257 def _extract_member(self, member, targetpath, pwd):
1258 """Extract the ZipInfo object 'member' to a physical
1259 file on the path targetpath.
1260 """
1261 # build the destination pathname, replacing
1262 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001263 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001264
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001265 if os.path.altsep:
1266 arcname = arcname.replace(os.path.altsep, os.path.sep)
1267 # interpret absolute pathname as relative, remove drive letter or
1268 # UNC path, redundant separators, "." and ".." components.
1269 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001270 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001271 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001272 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001273 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001274 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001275 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001276
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001277 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001278 targetpath = os.path.normpath(targetpath)
1279
1280 # Create all upper directories if necessary.
1281 upperdirs = os.path.dirname(targetpath)
1282 if upperdirs and not os.path.exists(upperdirs):
1283 os.makedirs(upperdirs)
1284
Martin v. Löwis59e47792009-01-24 14:10:07 +00001285 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001286 if not os.path.isdir(targetpath):
1287 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001288 return targetpath
1289
Antoine Pitrou17babc52012-11-17 23:50:08 +01001290 with self.open(member, pwd=pwd) as source, \
1291 open(targetpath, "wb") as target:
1292 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001293
1294 return targetpath
1295
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001296 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001297 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001298 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001299 import warnings
1300 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001302 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001304 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001305 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001306 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001307 if not self._allowZip64:
1308 requires_zip64 = None
1309 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1310 requires_zip64 = "Files count"
1311 elif zinfo.file_size > ZIP64_LIMIT:
1312 requires_zip64 = "Filesize"
1313 elif zinfo.header_offset > ZIP64_LIMIT:
1314 requires_zip64 = "Zipfile size"
1315 if requires_zip64:
1316 raise LargeZipFile(requires_zip64 +
1317 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318
1319 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001320 """Put the bytes from filename into the archive under the name
1321 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001322 if not self.fp:
1323 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001324 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001325
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001327 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001328 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001329 date_time = mtime[0:6]
1330 # Create ZipInfo instance to store file information
1331 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001332 arcname = filename
1333 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1334 while arcname[0] in (os.sep, os.altsep):
1335 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001336 if isdir:
1337 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001338 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001339 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Serhiy Storchaka8bc792a2015-11-22 14:49:58 +02001340 if isdir:
1341 zinfo.compress_type = ZIP_STORED
1342 elif compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001343 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 else:
Tim Peterse1190062001-01-15 03:34:38 +00001345 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001346
1347 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001348 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001349 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001350 if zinfo.compress_type == ZIP_LZMA:
1351 # Compressed data includes an end-of-stream (EOS) marker
1352 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001353
1354 self._writecheck(zinfo)
1355 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001356
1357 if isdir:
1358 zinfo.file_size = 0
1359 zinfo.compress_size = 0
1360 zinfo.CRC = 0
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001361 zinfo.external_attr |= 0x10 # MS-DOS directory flag
Martin v. Löwis59e47792009-01-24 14:10:07 +00001362 self.filelist.append(zinfo)
1363 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001364 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis59e47792009-01-24 14:10:07 +00001365 return
1366
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001367 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001368 with open(filename, "rb") as fp:
1369 # Must overwrite CRC and sizes with correct data later
1370 zinfo.CRC = CRC = 0
1371 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001372 # Compressed size can be larger than uncompressed size
1373 zip64 = self._allowZip64 and \
Christian Tismer59202e52013-10-21 03:59:23 +02001374 zinfo.file_size * 1.05 > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001375 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001376 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001377 while 1:
1378 buf = fp.read(1024 * 8)
1379 if not buf:
1380 break
1381 file_size = file_size + len(buf)
1382 CRC = crc32(buf, CRC) & 0xffffffff
1383 if cmpr:
1384 buf = cmpr.compress(buf)
1385 compress_size = compress_size + len(buf)
1386 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001387 if cmpr:
1388 buf = cmpr.flush()
1389 compress_size = compress_size + len(buf)
1390 self.fp.write(buf)
1391 zinfo.compress_size = compress_size
1392 else:
1393 zinfo.compress_size = file_size
1394 zinfo.CRC = CRC
1395 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001396 if not zip64 and self._allowZip64:
1397 if file_size > ZIP64_LIMIT:
1398 raise RuntimeError('File size has increased during compressing')
1399 if compress_size > ZIP64_LIMIT:
1400 raise RuntimeError('Compressed size larger than uncompressed size')
1401 # Seek backwards and write file header (which will now include
1402 # correct CRC and file sizes)
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001403 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001404 self.fp.seek(zinfo.header_offset, 0)
1405 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001406 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001407 self.filelist.append(zinfo)
1408 self.NameToInfo[zinfo.filename] = zinfo
1409
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001410 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001411 """Write a file into the archive. The contents is 'data', which
1412 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1413 it is encoded as UTF-8 first.
1414 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001415 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001416 if isinstance(data, str):
1417 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001418 if not isinstance(zinfo_or_arcname, ZipInfo):
1419 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001420 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001421 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001422 if zinfo.filename[-1] == '/':
1423 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1424 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1425 else:
1426 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001427 else:
1428 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001429
1430 if not self.fp:
1431 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001432 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001433
Guido van Rossum85825dc2007-08-27 17:03:28 +00001434 zinfo.file_size = len(data) # Uncompressed size
1435 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001436 if compress_type is not None:
1437 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001438 if zinfo.compress_type == ZIP_LZMA:
1439 # Compressed data includes an end-of-stream (EOS) marker
1440 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001441
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001442 self._writecheck(zinfo)
1443 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001444 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001445 co = _get_compressor(zinfo.compress_type)
1446 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001447 data = co.compress(data) + co.flush()
1448 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001449 else:
1450 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001451 zip64 = zinfo.file_size > ZIP64_LIMIT or \
Christian Tismer59202e52013-10-21 03:59:23 +02001452 zinfo.compress_size > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001453 if zip64 and not self._allowZip64:
1454 raise LargeZipFile("Filesize would require ZIP64 extensions")
1455 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001456 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001457 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001458 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001459 fmt = '<LQQ' if zip64 else '<LLL'
1460 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Christian Tismer59202e52013-10-21 03:59:23 +02001461 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001462 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001463 self.filelist.append(zinfo)
1464 self.NameToInfo[zinfo.filename] = zinfo
1465
1466 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001467 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001468 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001469
1470 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001471 """Close the file, and for mode "w" and "a" write the ending
1472 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001473 if self.fp is None:
1474 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001475
Antoine Pitrou17babc52012-11-17 23:50:08 +01001476 try:
1477 if self.mode in ("w", "a") and self._didModify: # write ending records
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001478 pos1 = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001479 for zinfo in self.filelist: # write central directory
Antoine Pitrou17babc52012-11-17 23:50:08 +01001480 dt = zinfo.date_time
1481 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1482 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1483 extra = []
1484 if zinfo.file_size > ZIP64_LIMIT \
Christian Tismer59202e52013-10-21 03:59:23 +02001485 or zinfo.compress_size > ZIP64_LIMIT:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001486 extra.append(zinfo.file_size)
1487 extra.append(zinfo.compress_size)
1488 file_size = 0xffffffff
1489 compress_size = 0xffffffff
1490 else:
1491 file_size = zinfo.file_size
1492 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001493
Antoine Pitrou17babc52012-11-17 23:50:08 +01001494 if zinfo.header_offset > ZIP64_LIMIT:
1495 extra.append(zinfo.header_offset)
1496 header_offset = 0xffffffff
1497 else:
1498 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001499
Antoine Pitrou17babc52012-11-17 23:50:08 +01001500 extra_data = zinfo.extra
Antoine Pitrou8572da52012-11-17 23:52:05 +01001501 min_version = 0
Antoine Pitrou17babc52012-11-17 23:50:08 +01001502 if extra:
1503 # Append a ZIP64 field to the extra's
1504 extra_data = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001505 '<HH' + 'Q'*len(extra),
1506 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001507
Antoine Pitrou8572da52012-11-17 23:52:05 +01001508 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001509
Antoine Pitrou8572da52012-11-17 23:52:05 +01001510 if zinfo.compress_type == ZIP_BZIP2:
1511 min_version = max(BZIP2_VERSION, min_version)
1512 elif zinfo.compress_type == ZIP_LZMA:
1513 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001514
Antoine Pitrou8572da52012-11-17 23:52:05 +01001515 extract_version = max(min_version, zinfo.extract_version)
1516 create_version = max(min_version, zinfo.create_version)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001517 try:
1518 filename, flag_bits = zinfo._encodeFilenameFlags()
1519 centdir = struct.pack(structCentralDir,
Christian Tismer59202e52013-10-21 03:59:23 +02001520 stringCentralDir, create_version,
1521 zinfo.create_system, extract_version, zinfo.reserved,
1522 flag_bits, zinfo.compress_type, dostime, dosdate,
1523 zinfo.CRC, compress_size, file_size,
1524 len(filename), len(extra_data), len(zinfo.comment),
1525 0, zinfo.internal_attr, zinfo.external_attr,
1526 header_offset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001527 except DeprecationWarning:
1528 print((structCentralDir, stringCentralDir, create_version,
Christian Tismer59202e52013-10-21 03:59:23 +02001529 zinfo.create_system, extract_version, zinfo.reserved,
1530 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1531 zinfo.CRC, compress_size, file_size,
1532 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1533 0, zinfo.internal_attr, zinfo.external_attr,
1534 header_offset), file=sys.stderr)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001535 raise
1536 self.fp.write(centdir)
1537 self.fp.write(filename)
1538 self.fp.write(extra_data)
1539 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001540
Antoine Pitrou17babc52012-11-17 23:50:08 +01001541 pos2 = self.fp.tell()
1542 # Write end-of-zip-archive record
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001543 centDirCount = len(self.filelist)
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001544 centDirSize = pos2 - pos1
1545 centDirOffset = pos1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001546 requires_zip64 = None
1547 if centDirCount > ZIP_FILECOUNT_LIMIT:
1548 requires_zip64 = "Files count"
1549 elif centDirOffset > ZIP64_LIMIT:
1550 requires_zip64 = "Central directory offset"
1551 elif centDirSize > ZIP64_LIMIT:
1552 requires_zip64 = "Central directory size"
1553 if requires_zip64:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001554 # Need to write the ZIP64 end-of-archive records
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001555 if not self._allowZip64:
1556 raise LargeZipFile(requires_zip64 +
1557 " would require ZIP64 extensions")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001558 zip64endrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001559 structEndArchive64, stringEndArchive64,
1560 44, 45, 45, 0, 0, centDirCount, centDirCount,
1561 centDirSize, centDirOffset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001562 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001563
Antoine Pitrou17babc52012-11-17 23:50:08 +01001564 zip64locrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001565 structEndArchive64Locator,
1566 stringEndArchive64Locator, 0, pos2, 1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001567 self.fp.write(zip64locrec)
1568 centDirCount = min(centDirCount, 0xFFFF)
1569 centDirSize = min(centDirSize, 0xFFFFFFFF)
1570 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001571
Antoine Pitrou17babc52012-11-17 23:50:08 +01001572 endrec = struct.pack(structEndArchive, stringEndArchive,
Christian Tismer59202e52013-10-21 03:59:23 +02001573 0, 0, centDirCount, centDirCount,
1574 centDirSize, centDirOffset, len(self._comment))
Antoine Pitrou17babc52012-11-17 23:50:08 +01001575 self.fp.write(endrec)
1576 self.fp.write(self._comment)
1577 self.fp.flush()
1578 finally:
1579 fp = self.fp
1580 self.fp = None
Serhiy Storchakab76bcc42015-01-26 13:45:39 +02001581 if not self._filePassed:
1582 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001583
1584
1585class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001586 """Class to create ZIP archives with Python library files and packages."""
1587
Georg Brandl8334fd92010-12-04 10:26:46 +00001588 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001589 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001590 ZipFile.__init__(self, file, mode=mode, compression=compression,
1591 allowZip64=allowZip64)
1592 self._optimize = optimize
1593
Christian Tismer59202e52013-10-21 03:59:23 +02001594 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001595 """Add all files from "pathname" to the ZIP archive.
1596
Fred Drake484d7352000-10-02 21:14:52 +00001597 If pathname is a package directory, search the directory and
1598 all package subdirectories recursively for all *.py and enter
1599 the modules into the archive. If pathname is a plain
1600 directory, listdir *.py and enter all modules. Else, pathname
1601 must be a Python *.py file and the module will be put into the
1602 archive. Added modules are always module.pyo or module.pyc.
1603 This method will compile the module.py into module.pyc if
1604 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001605 If filterfunc(pathname) is given, it is called with every argument.
1606 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001607 """
Christian Tismer59202e52013-10-21 03:59:23 +02001608 if filterfunc and not filterfunc(pathname):
1609 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001610 label = 'path' if os.path.isdir(pathname) else 'file'
1611 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001612 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001613 dir, name = os.path.split(pathname)
1614 if os.path.isdir(pathname):
1615 initname = os.path.join(pathname, "__init__.py")
1616 if os.path.isfile(initname):
1617 # This is a package directory, add it
1618 if basename:
1619 basename = "%s/%s" % (basename, name)
1620 else:
1621 basename = name
1622 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001623 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001624 fname, arcname = self._get_codename(initname[0:-3], basename)
1625 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001626 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001627 self.write(fname, arcname)
1628 dirlist = os.listdir(pathname)
1629 dirlist.remove("__init__.py")
1630 # Add all *.py files and package subdirectories
1631 for filename in dirlist:
1632 path = os.path.join(pathname, filename)
1633 root, ext = os.path.splitext(filename)
1634 if os.path.isdir(path):
1635 if os.path.isfile(os.path.join(path, "__init__.py")):
1636 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001637 self.writepy(path, basename,
1638 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001639 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001640 if filterfunc and not filterfunc(path):
1641 if self.debug:
1642 print('file "%s" skipped by filterfunc' % path)
1643 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001644 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001645 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001646 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001647 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001648 self.write(fname, arcname)
1649 else:
1650 # This is NOT a package directory, add its files at top level
1651 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001652 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001653 for filename in os.listdir(pathname):
1654 path = os.path.join(pathname, filename)
1655 root, ext = os.path.splitext(filename)
1656 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001657 if filterfunc and not filterfunc(path):
1658 if self.debug:
1659 print('file "%s" skipped by filterfunc' % path)
1660 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001661 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001662 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001663 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001664 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001665 self.write(fname, arcname)
1666 else:
1667 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001668 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001669 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001670 fname, arcname = self._get_codename(pathname[0:-3], basename)
1671 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001672 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001673 self.write(fname, arcname)
1674
1675 def _get_codename(self, pathname, basename):
1676 """Return (filename, archivename) for the path.
1677
Fred Drake484d7352000-10-02 21:14:52 +00001678 Given a module name path, return the correct file path and
1679 archive name, compiling if necessary. For example, given
1680 /python/lib/string, return (/python/lib/string.pyc, string).
1681 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001682 def _compile(file, optimize=-1):
1683 import py_compile
1684 if self.debug:
1685 print("Compiling", file)
1686 try:
1687 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001688 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001689 print(err.msg)
1690 return False
1691 return True
1692
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001693 file_py = pathname + ".py"
1694 file_pyc = pathname + ".pyc"
1695 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001696 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1697 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001698 if self._optimize == -1:
1699 # legacy mode: use whatever file is present
1700 if (os.path.isfile(file_pyo) and
1701 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1702 # Use .pyo file.
1703 arcname = fname = file_pyo
1704 elif (os.path.isfile(file_pyc) and
1705 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1706 # Use .pyc file.
1707 arcname = fname = file_pyc
1708 elif (os.path.isfile(pycache_pyc) and
1709 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1710 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1711 # file name in the archive.
1712 fname = pycache_pyc
1713 arcname = file_pyc
1714 elif (os.path.isfile(pycache_pyo) and
1715 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1716 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1717 # file name in the archive.
1718 fname = pycache_pyo
1719 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001720 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001721 # Compile py into PEP 3147 pyc file.
1722 if _compile(file_py):
1723 fname = (pycache_pyc if __debug__ else pycache_pyo)
1724 arcname = (file_pyc if __debug__ else file_pyo)
1725 else:
1726 fname = arcname = file_py
1727 else:
1728 # new mode: use given optimization level
1729 if self._optimize == 0:
1730 fname = pycache_pyc
1731 arcname = file_pyc
1732 else:
1733 fname = pycache_pyo
1734 arcname = file_pyo
1735 if not (os.path.isfile(fname) and
1736 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1737 if not _compile(file_py, optimize=self._optimize):
1738 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001739 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001740 if basename:
1741 archivename = "%s/%s" % (basename, archivename)
1742 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001743
1744
1745def main(args = None):
1746 import textwrap
1747 USAGE=textwrap.dedent("""\
1748 Usage:
1749 zipfile.py -l zipfile.zip # Show listing of a zipfile
1750 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1751 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1752 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1753 """)
1754 if args is None:
1755 args = sys.argv[1:]
1756
1757 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001758 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001759 sys.exit(1)
1760
1761 if args[0] == '-l':
1762 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001763 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001764 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001765 with ZipFile(args[1], 'r') as zf:
1766 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001767
1768 elif args[0] == '-t':
1769 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001770 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001771 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001772 with ZipFile(args[1], 'r') as zf:
1773 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001774 if badfile:
1775 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001776 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001777
1778 elif args[0] == '-e':
1779 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001780 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001781 sys.exit(1)
1782
Antoine Pitrou17babc52012-11-17 23:50:08 +01001783 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001784 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001785
1786 elif args[0] == '-c':
1787 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001788 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001789 sys.exit(1)
1790
1791 def addToZip(zf, path, zippath):
1792 if os.path.isfile(path):
1793 zf.write(path, zippath, ZIP_DEFLATED)
1794 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001795 if zippath:
1796 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001797 for nm in os.listdir(path):
1798 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001799 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001800 # else: ignore
1801
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001802 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001803 for path in args[2:]:
1804 zippath = os.path.basename(path)
1805 if not zippath:
1806 zippath = os.path.basename(os.path.dirname(path))
1807 if zippath in ('', os.curdir, os.pardir):
1808 zippath = ''
1809 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001810
1811if __name__ == "__main__":
1812 main()