blob: 74e1566c7d01d88b7c860a6cd3e01db3c22e8325 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040027except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040032except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030053ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000054ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200167 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200183 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200193 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200199 if len(data) != sizeEndCentDir64Locator:
200 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000201 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
202 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000206 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207
208 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
210 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200211 if len(data) != sizeEndCentDir64:
212 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200214 dircount, dircount2, dirsize, diroffset = \
215 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000216 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 return endrec
218
219 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000220 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221 endrec[_ECD_DISK_NUMBER] = disk_num
222 endrec[_ECD_DISK_START] = disk_dir
223 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
224 endrec[_ECD_ENTRIES_TOTAL] = dircount2
225 endrec[_ECD_SIZE] = dirsize
226 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227 return endrec
228
229
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000230def _EndRecData(fpin):
231 """Return data from the "End of Central Directory" record, or None.
232
233 The data is a list of the nine items in the ZIP "End of central dir"
234 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Determine file size
237 fpin.seek(0, 2)
238 filesize = fpin.tell()
239
240 # Check to see if this is ZIP file with no archive comment (the
241 # "end of central directory" structure should be the last item in the
242 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000243 try:
244 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200245 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000247 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200248 if (len(data) == sizeEndCentDir and
249 data[0:4] == stringEndArchive and
250 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000251 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000253 endrec=list(endrec)
254
255 # Append a blank comment and record start offset
256 endrec.append(b"")
257 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000258
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000259 # Try to read the "Zip64 end of central directory" structure
260 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
262 # Either this is not a ZIP file, or it is a ZIP file with an archive
263 # comment. Search the end of the file for the "end of central directory"
264 # record signature. The comment is the last item in the ZIP file and may be
265 # up to 64K long. It is assumed that the "end of central directory" magic
266 # number does not appear in the comment.
267 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
268 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000270 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000271 if start >= 0:
272 # found the magic number; attempt to unpack and interpret
273 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200274 if len(recData) != sizeEndCentDir:
275 # Zip file is corrupted.
276 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400278 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
279 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
280 endrec.append(comment)
281 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000282
R David Murray4fbb9db2011-06-09 15:50:51 -0400283 # Try to read the "Zip64 end of central directory" structure
284 return _EndRecData64(fpin, maxCommentStart + start - filesize,
285 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200288 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000289
Fred Drake484d7352000-10-02 21:14:52 +0000290
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000292 """Class with attributes describing each file in the ZIP archive."""
293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200295 'orig_filename',
296 'filename',
297 'date_time',
298 'compress_type',
299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000338 self.comment = b"" # Comment for each file
339 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000340 if sys.platform == 'win32':
341 self.create_system = 0 # System which created ZIP archive
342 else:
343 # Assume everything else is unix-y
344 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200345 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
346 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000347 self.reserved = 0 # Must be zero
348 self.flag_bits = 0 # ZIP flag bits
349 self.volume = 0 # Volume number of file header
350 self.internal_attr = 0 # Internal attributes
351 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000352 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000353 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000354 # CRC CRC-32 of the uncompressed file
355 # compress_size Size of the compressed file
356 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200358 def __repr__(self):
359 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
360 if self.compress_type != ZIP_STORED:
361 result.append(' compress_type=%s' %
362 compressor_names.get(self.compress_type,
363 self.compress_type))
364 hi = self.external_attr >> 16
365 lo = self.external_attr & 0xFFFF
366 if hi:
367 result.append(' filemode=%r' % stat.filemode(hi))
368 if lo:
369 result.append(' external_attr=%#x' % lo)
370 isdir = self.filename[-1:] == '/'
371 if not isdir or self.file_size:
372 result.append(' file_size=%r' % self.file_size)
373 if ((not isdir or self.compress_size) and
374 (self.compress_type != ZIP_STORED or
375 self.file_size != self.compress_size)):
376 result.append(' compress_size=%r' % self.compress_size)
377 result.append('>')
378 return ''.join(result)
379
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200380 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000381 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382 dt = self.date_time
383 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000384 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000386 # Set these to zero because we write them after the file data
387 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 else:
Tim Peterse1190062001-01-15 03:34:38 +0000389 CRC = self.CRC
390 compress_size = self.compress_size
391 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000392
393 extra = self.extra
394
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200395 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200396 if zip64 is None:
397 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
398 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000399 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000400 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200401 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200402 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
403 if not zip64:
404 raise LargeZipFile("Filesize would require ZIP64 extensions")
405 # File is larger than what fits into a 4 byte integer,
406 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000407 file_size = 0xffffffff
408 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200409 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000410
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200411 if self.compress_type == ZIP_BZIP2:
412 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200413 elif self.compress_type == ZIP_LZMA:
414 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200415
416 self.extract_version = max(min_version, self.extract_version)
417 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000418 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000419 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200420 self.extract_version, self.reserved, flag_bits,
421 self.compress_type, dostime, dosdate, CRC,
422 compress_size, file_size,
423 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000424 return header + filename + extra
425
426 def _encodeFilenameFlags(self):
427 try:
428 return self.filename.encode('ascii'), self.flag_bits
429 except UnicodeEncodeError:
430 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000431
432 def _decodeExtra(self):
433 # Try to decode the extra field.
434 extra = self.extra
435 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700436 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000437 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000438 if tp == 1:
439 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000440 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000441 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000442 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000443 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000444 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 elif ln == 0:
446 counts = ()
447 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000448 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449
450 idx = 0
451
452 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000453 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000454 self.file_size = counts[idx]
455 idx += 1
456
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000457 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 self.compress_size = counts[idx]
459 idx += 1
460
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000461 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000462 old = self.header_offset
463 self.header_offset = counts[idx]
464 idx+=1
465
466 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000467
468
Thomas Wouterscf297e42007-02-23 15:07:44 +0000469class _ZipDecrypter:
470 """Class to handle decryption of files stored within a ZIP archive.
471
472 ZIP supports a password-based form of encryption. Even though known
473 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000474 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000475
476 Usage:
477 zd = _ZipDecrypter(mypwd)
478 plain_char = zd(cypher_char)
479 plain_text = map(zd, cypher_text)
480 """
481
482 def _GenerateCRCTable():
483 """Generate a CRC-32 table.
484
485 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
486 internal keys. We noticed that a direct implementation is faster than
487 relying on binascii.crc32().
488 """
489 poly = 0xedb88320
490 table = [0] * 256
491 for i in range(256):
492 crc = i
493 for j in range(8):
494 if crc & 1:
495 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
496 else:
497 crc = ((crc >> 1) & 0x7FFFFFFF)
498 table[i] = crc
499 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500500 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000501
502 def _crc32(self, ch, crc):
503 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000504 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000505
506 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500507 if _ZipDecrypter.crctable is None:
508 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000509 self.key0 = 305419896
510 self.key1 = 591751049
511 self.key2 = 878082192
512 for p in pwd:
513 self._UpdateKeys(p)
514
515 def _UpdateKeys(self, c):
516 self.key0 = self._crc32(c, self.key0)
517 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
518 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000519 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000520
521 def __call__(self, c):
522 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000523 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000524 k = self.key2 | 2
525 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000526 self._UpdateKeys(c)
527 return c
528
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200529
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200530class LZMACompressor:
531
532 def __init__(self):
533 self._comp = None
534
535 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200536 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200537 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200538 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200539 ])
540 return struct.pack('<BBH', 9, 4, len(props)) + props
541
542 def compress(self, data):
543 if self._comp is None:
544 return self._init() + self._comp.compress(data)
545 return self._comp.compress(data)
546
547 def flush(self):
548 if self._comp is None:
549 return self._init() + self._comp.flush()
550 return self._comp.flush()
551
552
553class LZMADecompressor:
554
555 def __init__(self):
556 self._decomp = None
557 self._unconsumed = b''
558 self.eof = False
559
560 def decompress(self, data):
561 if self._decomp is None:
562 self._unconsumed += data
563 if len(self._unconsumed) <= 4:
564 return b''
565 psize, = struct.unpack('<H', self._unconsumed[2:4])
566 if len(self._unconsumed) <= 4 + psize:
567 return b''
568
569 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200570 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
571 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200572 ])
573 data = self._unconsumed[4 + psize:]
574 del self._unconsumed
575
576 result = self._decomp.decompress(data)
577 self.eof = self._decomp.eof
578 return result
579
580
581compressor_names = {
582 0: 'store',
583 1: 'shrink',
584 2: 'reduce',
585 3: 'reduce',
586 4: 'reduce',
587 5: 'reduce',
588 6: 'implode',
589 7: 'tokenize',
590 8: 'deflate',
591 9: 'deflate64',
592 10: 'implode',
593 12: 'bzip2',
594 14: 'lzma',
595 18: 'terse',
596 19: 'lz77',
597 97: 'wavpack',
598 98: 'ppmd',
599}
600
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200601def _check_compression(compression):
602 if compression == ZIP_STORED:
603 pass
604 elif compression == ZIP_DEFLATED:
605 if not zlib:
606 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200607 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200608 elif compression == ZIP_BZIP2:
609 if not bz2:
610 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200611 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200612 elif compression == ZIP_LZMA:
613 if not lzma:
614 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200615 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200616 else:
617 raise RuntimeError("That compression method is not supported")
618
619
620def _get_compressor(compress_type):
621 if compress_type == ZIP_DEFLATED:
622 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200623 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200624 elif compress_type == ZIP_BZIP2:
625 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200626 elif compress_type == ZIP_LZMA:
627 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200628 else:
629 return None
630
631
632def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200633 if compress_type == ZIP_STORED:
634 return None
635 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200636 return zlib.decompressobj(-15)
637 elif compress_type == ZIP_BZIP2:
638 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200639 elif compress_type == ZIP_LZMA:
640 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200641 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200642 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200643 if descr:
644 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
645 else:
646 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647
648
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000649class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650 """File-like object for reading an archive member.
651 Is returned by ZipFile.open().
652 """
653
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000654 # Max size supported by decompressor.
655 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000657 # Read from compressed files in 4k blocks.
658 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000659
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000660 # Search for universal newlines or line chunks.
661 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
662
Łukasz Langae94980a2010-11-22 23:31:26 +0000663 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
664 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000665 self._fileobj = fileobj
666 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000667 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000668
Ezio Melotti92b47432010-01-28 01:44:41 +0000669 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000670 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200671 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000672
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200673 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000674
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200675 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000676 self._readbuffer = b''
677 self._offset = 0
678
679 self._universal = 'U' in mode
680 self.newlines = None
681
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000682 # Adjust read size for encrypted files since the first 12 bytes
683 # are for the encryption/password information.
684 if self._decrypter is not None:
685 self._compress_left -= 12
686
687 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000688 self.name = zipinfo.filename
689
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000690 if hasattr(zipinfo, 'CRC'):
691 self._expected_crc = zipinfo.CRC
692 self._running_crc = crc32(b'') & 0xffffffff
693 else:
694 self._expected_crc = None
695
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200696 def __repr__(self):
697 result = ['<%s.%s' % (self.__class__.__module__,
698 self.__class__.__qualname__)]
699 if not self.closed:
700 result.append(' name=%r mode=%r' % (self.name, self.mode))
701 if self._compress_type != ZIP_STORED:
702 result.append(' compress_type=%s' %
703 compressor_names.get(self._compress_type,
704 self._compress_type))
705 else:
706 result.append(' [closed]')
707 result.append('>')
708 return ''.join(result)
709
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000710 def readline(self, limit=-1):
711 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000712
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000713 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000714 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000715
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000716 if not self._universal and limit < 0:
717 # Shortcut common case - newline found in buffer.
718 i = self._readbuffer.find(b'\n', self._offset) + 1
719 if i > 0:
720 line = self._readbuffer[self._offset: i]
721 self._offset = i
722 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000723
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000724 if not self._universal:
725 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000726
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000727 line = b''
728 while limit < 0 or len(line) < limit:
729 readahead = self.peek(2)
730 if readahead == b'':
731 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000732
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000733 #
734 # Search for universal newlines or line chunks.
735 #
736 # The pattern returns either a line chunk or a newline, but not
737 # both. Combined with peek(2), we are assured that the sequence
738 # '\r\n' is always retrieved completely and never split into
739 # separate newlines - '\r', '\n' due to coincidental readaheads.
740 #
741 match = self.PATTERN.search(readahead)
742 newline = match.group('newline')
743 if newline is not None:
744 if self.newlines is None:
745 self.newlines = []
746 if newline not in self.newlines:
747 self.newlines.append(newline)
748 self._offset += len(newline)
749 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000750
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000751 chunk = match.group('chunk')
752 if limit >= 0:
753 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 self._offset += len(chunk)
756 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000757
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758 return line
759
760 def peek(self, n=1):
761 """Returns buffered bytes without advancing the position."""
762 if n > len(self._readbuffer) - self._offset:
763 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200764 if len(chunk) > self._offset:
765 self._readbuffer = chunk + self._readbuffer[self._offset:]
766 self._offset = 0
767 else:
768 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000769
770 # Return up to 512 bytes to reduce allocation overhead for tight loops.
771 return self._readbuffer[self._offset: self._offset + 512]
772
773 def readable(self):
774 return True
775
776 def read(self, n=-1):
777 """Read and return up to n bytes.
778 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200780 if n is None or n < 0:
781 buf = self._readbuffer[self._offset:]
782 self._readbuffer = b''
783 self._offset = 0
784 while not self._eof:
785 buf += self._read1(self.MAX_N)
786 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000787
Antoine Pitrou78157b32012-06-23 16:44:48 +0200788 end = n + self._offset
789 if end < len(self._readbuffer):
790 buf = self._readbuffer[self._offset:end]
791 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200792 return buf
793
Antoine Pitrou78157b32012-06-23 16:44:48 +0200794 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200795 buf = self._readbuffer[self._offset:]
796 self._readbuffer = b''
797 self._offset = 0
798 while n > 0 and not self._eof:
799 data = self._read1(n)
800 if n < len(data):
801 self._readbuffer = data
802 self._offset = n
803 buf += data[:n]
804 break
805 buf += data
806 n -= len(data)
807 return buf
808
809 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000810 # Update the CRC using the given data.
811 if self._expected_crc is None:
812 # No need to compute the CRC if we don't have a reference value
813 return
814 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
815 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200816 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000817 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000818
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000819 def read1(self, n):
820 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200822 if n is None or n < 0:
823 buf = self._readbuffer[self._offset:]
824 self._readbuffer = b''
825 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300826 while not self._eof:
827 data = self._read1(self.MAX_N)
828 if data:
829 buf += data
830 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200831 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832
Antoine Pitrou78157b32012-06-23 16:44:48 +0200833 end = n + self._offset
834 if end < len(self._readbuffer):
835 buf = self._readbuffer[self._offset:end]
836 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200837 return buf
838
Antoine Pitrou78157b32012-06-23 16:44:48 +0200839 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200840 buf = self._readbuffer[self._offset:]
841 self._readbuffer = b''
842 self._offset = 0
843 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300844 while not self._eof:
845 data = self._read1(n)
846 if n < len(data):
847 self._readbuffer = data
848 self._offset = n
849 buf += data[:n]
850 break
851 if data:
852 buf += data
853 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200854 return buf
855
856 def _read1(self, n):
857 # Read up to n compressed bytes with at most one read() system call,
858 # decrypt and decompress them.
859 if self._eof or n <= 0:
860 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000862 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200863 if self._compress_type == ZIP_DEFLATED:
864 ## Handle unconsumed data.
865 data = self._decompressor.unconsumed_tail
866 if n > len(data):
867 data += self._read2(n - len(data))
868 else:
869 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200871 if self._compress_type == ZIP_STORED:
872 self._eof = self._compress_left <= 0
873 elif self._compress_type == ZIP_DEFLATED:
874 n = max(n, self.MIN_READ_SIZE)
875 data = self._decompressor.decompress(data, n)
876 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200877 self._compress_left <= 0 and
878 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200879 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000880 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200881 else:
882 data = self._decompressor.decompress(data)
883 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200885 data = data[:self._left]
886 self._left -= len(data)
887 if self._left <= 0:
888 self._eof = True
889 self._update_crc(data)
890 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000891
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200892 def _read2(self, n):
893 if self._compress_left <= 0:
894 return b''
895
896 n = max(n, self.MIN_READ_SIZE)
897 n = min(n, self._compress_left)
898
899 data = self._fileobj.read(n)
900 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200901 if not data:
902 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903
904 if self._decrypter is not None:
905 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000906 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000907
Łukasz Langae94980a2010-11-22 23:31:26 +0000908 def close(self):
909 try:
910 if self._close_fileobj:
911 self._fileobj.close()
912 finally:
913 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000914
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000915
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000916class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000917 """ Class with methods to open, read, write, close, list zip files.
918
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200919 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000920
Fred Drake3d9091e2001-03-26 15:49:24 +0000921 file: Either the path to the file, or a file-like object.
922 If it is a path, the file will be opened and closed by ZipFile.
923 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200924 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
925 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000926 allowZip64: if True ZipFile will create files with ZIP64 extensions when
927 needed, otherwise it will raise an exception when this would
928 be necessary.
929
Fred Drake3d9091e2001-03-26 15:49:24 +0000930 """
Fred Drake484d7352000-10-02 21:14:52 +0000931
Fred Drake90eac282001-02-28 05:29:34 +0000932 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800933 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000934
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200935 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000936 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000937 if mode not in ("r", "w", "a"):
938 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
939
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200940 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000941
942 self._allowZip64 = allowZip64
943 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000944 self.debug = 0 # Level of printing: 0 through 3
945 self.NameToInfo = {} # Find file info given name
946 self.filelist = [] # List of ZipInfo instances for archive
947 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000948 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000949 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400950 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000951
Fred Drake3d9091e2001-03-26 15:49:24 +0000952 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000953 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000954 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000955 self._filePassed = 0
956 self.filename = file
957 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000958 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000959 self.fp = io.open(file, modeDict[mode])
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200960 except OSError:
Thomas Wouterscf297e42007-02-23 15:07:44 +0000961 if mode == 'a':
962 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000963 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000964 else:
965 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000966 else:
967 self._filePassed = 1
968 self.fp = file
969 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000970
Antoine Pitrou17babc52012-11-17 23:50:08 +0100971 try:
972 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000973 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100974 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000975 # set the modified flag so central directory gets written
976 # even if no files are added to the archive
977 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100978 elif key == 'a':
979 try:
980 # See if file is a zip file
981 self._RealGetContents()
982 # seek to start of directory and overwrite
983 self.fp.seek(self.start_dir, 0)
984 except BadZipFile:
985 # file is not a zip file, just append
986 self.fp.seek(0, 2)
987
988 # set the modified flag so central directory gets written
989 # even if no files are added to the archive
990 self._didModify = True
991 else:
992 raise RuntimeError('Mode must be "r", "w" or "a"')
993 except:
994 fp = self.fp
995 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000996 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100997 fp.close()
998 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000999
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001000 def __enter__(self):
1001 return self
1002
1003 def __exit__(self, type, value, traceback):
1004 self.close()
1005
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001006 def __repr__(self):
1007 result = ['<%s.%s' % (self.__class__.__module__,
1008 self.__class__.__qualname__)]
1009 if self.fp is not None:
1010 if self._filePassed:
1011 result.append(' file=%r' % self.fp)
1012 elif self.filename is not None:
1013 result.append(' filename=%r' % self.filename)
1014 result.append(' mode=%r' % self.mode)
1015 else:
1016 result.append(' [closed]')
1017 result.append('>')
1018 return ''.join(result)
1019
Tim Peters7d3bad62001-04-04 18:56:49 +00001020 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001021 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001022 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001023 try:
1024 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001025 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001026 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001027 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001028 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001029 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001030 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001031 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1032 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001033 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001034
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001035 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001036 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001037 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1038 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001039 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1040
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001042 inferred = concat + offset_cd
1043 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001044 # self.start_dir: Position of start of central directory
1045 self.start_dir = offset_cd + concat
1046 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001047 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001048 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001049 total = 0
1050 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001051 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001052 if len(centdir) != sizeCentralDir:
1053 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001055 if centdir[_CD_SIGNATURE] != stringCentralDir:
1056 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001057 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001058 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001059 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001060 flags = centdir[5]
1061 if flags & 0x800:
1062 # UTF-8 file names extension
1063 filename = filename.decode('utf-8')
1064 else:
1065 # Historical ZIP filename encoding
1066 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001067 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001068 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001069 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1070 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001073 x.flag_bits, x.compress_type, t, d,
1074 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001075 if x.extract_version > MAX_EXTRACT_VERSION:
1076 raise NotImplementedError("zip file version %.1f" %
1077 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001078 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1079 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001080 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001082 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001083
1084 x._decodeExtra()
1085 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001086 self.filelist.append(x)
1087 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001088
1089 # update total bytes read from central directory
1090 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1091 + centdir[_CD_EXTRA_FIELD_LENGTH]
1092 + centdir[_CD_COMMENT_LENGTH])
1093
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001095 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001096
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097
1098 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001099 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001100 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101
1102 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001103 """Return a list of class ZipInfo instances for files in the
1104 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 return self.filelist
1106
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001107 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001108 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001109 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1110 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001112 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001113 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1114 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001115
1116 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001117 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001118 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 for zinfo in self.filelist:
1120 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001121 # Read by chunks, to avoid an OverflowError or a
1122 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001123 with self.open(zinfo.filename, "r") as f:
1124 while f.read(chunk_size): # Check CRC-32
1125 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001126 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127 return zinfo.filename
1128
1129 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001130 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001131 info = self.NameToInfo.get(name)
1132 if info is None:
1133 raise KeyError(
1134 'There is no item named %r in the archive' % name)
1135
1136 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001137
Thomas Wouterscf297e42007-02-23 15:07:44 +00001138 def setpassword(self, pwd):
1139 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001140 if pwd and not isinstance(pwd, bytes):
1141 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1142 if pwd:
1143 self.pwd = pwd
1144 else:
1145 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001146
R David Murrayf50b38a2012-04-12 18:44:58 -04001147 @property
1148 def comment(self):
1149 """The comment text associated with the ZIP file."""
1150 return self._comment
1151
1152 @comment.setter
1153 def comment(self, comment):
1154 if not isinstance(comment, bytes):
1155 raise TypeError("comment: expected bytes, got %s" % type(comment))
1156 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001157 if len(comment) > ZIP_MAX_COMMENT:
1158 import warnings
1159 warnings.warn('Archive comment is too long; truncating to %d bytes'
1160 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001161 comment = comment[:ZIP_MAX_COMMENT]
1162 self._comment = comment
1163 self._didModify = True
1164
Thomas Wouterscf297e42007-02-23 15:07:44 +00001165 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001166 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001167 with self.open(name, "r", pwd) as fp:
1168 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001169
1170 def open(self, name, mode="r", pwd=None):
1171 """Return file-like object for 'name'."""
1172 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001173 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001174 if 'U' in mode:
1175 import warnings
1176 warnings.warn("'U' mode is deprecated",
1177 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001178 if pwd and not isinstance(pwd, bytes):
1179 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001181 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001182 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001183
Guido van Rossumd8faa362007-04-27 19:54:29 +00001184 # Only open a new file for instances where we were not
1185 # given a file object in the constructor
1186 if self._filePassed:
1187 zef_file = self.fp
1188 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001189 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001190
Antoine Pitrou17babc52012-11-17 23:50:08 +01001191 try:
1192 # Make sure we have an info object
1193 if isinstance(name, ZipInfo):
1194 # 'name' is already an info object
1195 zinfo = name
1196 else:
1197 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001198 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001199 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001200
Antoine Pitrou17babc52012-11-17 23:50:08 +01001201 # Skip the file header:
1202 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001203 if len(fheader) != sizeFileHeader:
1204 raise BadZipFile("Truncated file header")
1205 fheader = struct.unpack(structFileHeader, fheader)
1206 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001207 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001208
Antoine Pitrou17babc52012-11-17 23:50:08 +01001209 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1210 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1211 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001212
Antoine Pitrou8572da52012-11-17 23:52:05 +01001213 if zinfo.flag_bits & 0x20:
1214 # Zip 2.7: compressed patched data
1215 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001216
Antoine Pitrou8572da52012-11-17 23:52:05 +01001217 if zinfo.flag_bits & 0x40:
1218 # strong encryption
1219 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001220
Antoine Pitrou17babc52012-11-17 23:50:08 +01001221 if zinfo.flag_bits & 0x800:
1222 # UTF-8 filename
1223 fname_str = fname.decode("utf-8")
1224 else:
1225 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001226
Antoine Pitrou17babc52012-11-17 23:50:08 +01001227 if fname_str != zinfo.orig_filename:
1228 raise BadZipFile(
1229 'File name in directory %r and header %r differ.'
1230 % (zinfo.orig_filename, fname))
1231
1232 # check for encrypted flag & handle password
1233 is_encrypted = zinfo.flag_bits & 0x1
1234 zd = None
1235 if is_encrypted:
1236 if not pwd:
1237 pwd = self.pwd
1238 if not pwd:
1239 raise RuntimeError("File %s is encrypted, password "
1240 "required for extraction" % name)
1241
1242 zd = _ZipDecrypter(pwd)
1243 # The first 12 bytes in the cypher stream is an encryption header
1244 # used to strengthen the algorithm. The first 11 bytes are
1245 # completely random, while the 12th contains the MSB of the CRC,
1246 # or the MSB of the file time depending on the header type
1247 # and is used to check the correctness of the password.
1248 header = zef_file.read(12)
1249 h = list(map(zd, header[0:12]))
1250 if zinfo.flag_bits & 0x8:
1251 # compare against the file type from extended local headers
1252 check_byte = (zinfo._raw_time >> 8) & 0xff
1253 else:
1254 # compare against the CRC otherwise
1255 check_byte = (zinfo.CRC >> 24) & 0xff
1256 if h[11] != check_byte:
1257 raise RuntimeError("Bad password for file", name)
1258
1259 return ZipExtFile(zef_file, mode, zinfo, zd,
1260 close_fileobj=not self._filePassed)
1261 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001262 if not self._filePassed:
1263 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001264 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001265
Christian Heimes790c8232008-01-07 21:14:23 +00001266 def extract(self, member, path=None, pwd=None):
1267 """Extract a member from the archive to the current working directory,
1268 using its full name. Its file information is extracted as accurately
1269 as possible. `member' may be a filename or a ZipInfo object. You can
1270 specify a different directory using `path'.
1271 """
1272 if not isinstance(member, ZipInfo):
1273 member = self.getinfo(member)
1274
1275 if path is None:
1276 path = os.getcwd()
1277
1278 return self._extract_member(member, path, pwd)
1279
1280 def extractall(self, path=None, members=None, pwd=None):
1281 """Extract all members from the archive to the current working
1282 directory. `path' specifies a different directory to extract to.
1283 `members' is optional and must be a subset of the list returned
1284 by namelist().
1285 """
1286 if members is None:
1287 members = self.namelist()
1288
1289 for zipinfo in members:
1290 self.extract(zipinfo, path, pwd)
1291
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001292 @classmethod
1293 def _sanitize_windows_name(cls, arcname, pathsep):
1294 """Replace bad characters and remove trailing dots from parts."""
1295 table = cls._windows_illegal_name_trans_table
1296 if not table:
1297 illegal = ':<>|"?*'
1298 table = str.maketrans(illegal, '_' * len(illegal))
1299 cls._windows_illegal_name_trans_table = table
1300 arcname = arcname.translate(table)
1301 # remove trailing dots
1302 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1303 # rejoin, removing empty parts.
1304 arcname = pathsep.join(x for x in arcname if x)
1305 return arcname
1306
Christian Heimes790c8232008-01-07 21:14:23 +00001307 def _extract_member(self, member, targetpath, pwd):
1308 """Extract the ZipInfo object 'member' to a physical
1309 file on the path targetpath.
1310 """
1311 # build the destination pathname, replacing
1312 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001313 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001314
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001315 if os.path.altsep:
1316 arcname = arcname.replace(os.path.altsep, os.path.sep)
1317 # interpret absolute pathname as relative, remove drive letter or
1318 # UNC path, redundant separators, "." and ".." components.
1319 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001320 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001321 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001322 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001323 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001324 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001325 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001326
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001327 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001328 targetpath = os.path.normpath(targetpath)
1329
1330 # Create all upper directories if necessary.
1331 upperdirs = os.path.dirname(targetpath)
1332 if upperdirs and not os.path.exists(upperdirs):
1333 os.makedirs(upperdirs)
1334
Martin v. Löwis59e47792009-01-24 14:10:07 +00001335 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001336 if not os.path.isdir(targetpath):
1337 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001338 return targetpath
1339
Antoine Pitrou17babc52012-11-17 23:50:08 +01001340 with self.open(member, pwd=pwd) as source, \
1341 open(targetpath, "wb") as target:
1342 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001343
1344 return targetpath
1345
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001346 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001347 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001348 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001349 import warnings
1350 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001351 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001352 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001353 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001354 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001355 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001356 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001357 if not self._allowZip64:
1358 requires_zip64 = None
1359 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1360 requires_zip64 = "Files count"
1361 elif zinfo.file_size > ZIP64_LIMIT:
1362 requires_zip64 = "Filesize"
1363 elif zinfo.header_offset > ZIP64_LIMIT:
1364 requires_zip64 = "Zipfile size"
1365 if requires_zip64:
1366 raise LargeZipFile(requires_zip64 +
1367 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001368
1369 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001370 """Put the bytes from filename into the archive under the name
1371 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001372 if not self.fp:
1373 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001374 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001375
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001377 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001378 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379 date_time = mtime[0:6]
1380 # Create ZipInfo instance to store file information
1381 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001382 arcname = filename
1383 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1384 while arcname[0] in (os.sep, os.altsep):
1385 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001386 if isdir:
1387 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001388 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001389 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001390 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001391 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392 else:
Tim Peterse1190062001-01-15 03:34:38 +00001393 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001394
1395 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001396 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001397 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001398 if zinfo.compress_type == ZIP_LZMA:
1399 # Compressed data includes an end-of-stream (EOS) marker
1400 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001401
1402 self._writecheck(zinfo)
1403 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001404
1405 if isdir:
1406 zinfo.file_size = 0
1407 zinfo.compress_size = 0
1408 zinfo.CRC = 0
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001409 zinfo.external_attr |= 0x10 # MS-DOS directory flag
Martin v. Löwis59e47792009-01-24 14:10:07 +00001410 self.filelist.append(zinfo)
1411 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001412 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis59e47792009-01-24 14:10:07 +00001413 return
1414
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001415 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001416 with open(filename, "rb") as fp:
1417 # Must overwrite CRC and sizes with correct data later
1418 zinfo.CRC = CRC = 0
1419 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001420 # Compressed size can be larger than uncompressed size
1421 zip64 = self._allowZip64 and \
Christian Tismer59202e52013-10-21 03:59:23 +02001422 zinfo.file_size * 1.05 > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001423 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001424 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001425 while 1:
1426 buf = fp.read(1024 * 8)
1427 if not buf:
1428 break
1429 file_size = file_size + len(buf)
1430 CRC = crc32(buf, CRC) & 0xffffffff
1431 if cmpr:
1432 buf = cmpr.compress(buf)
1433 compress_size = compress_size + len(buf)
1434 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001435 if cmpr:
1436 buf = cmpr.flush()
1437 compress_size = compress_size + len(buf)
1438 self.fp.write(buf)
1439 zinfo.compress_size = compress_size
1440 else:
1441 zinfo.compress_size = file_size
1442 zinfo.CRC = CRC
1443 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001444 if not zip64 and self._allowZip64:
1445 if file_size > ZIP64_LIMIT:
1446 raise RuntimeError('File size has increased during compressing')
1447 if compress_size > ZIP64_LIMIT:
1448 raise RuntimeError('Compressed size larger than uncompressed size')
1449 # Seek backwards and write file header (which will now include
1450 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001451 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001452 self.fp.seek(zinfo.header_offset, 0)
1453 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001454 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001455 self.filelist.append(zinfo)
1456 self.NameToInfo[zinfo.filename] = zinfo
1457
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001458 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001459 """Write a file into the archive. The contents is 'data', which
1460 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1461 it is encoded as UTF-8 first.
1462 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001463 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001464 if isinstance(data, str):
1465 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001466 if not isinstance(zinfo_or_arcname, ZipInfo):
1467 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001468 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001469 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001470 if zinfo.filename[-1] == '/':
1471 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1472 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1473 else:
1474 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001475 else:
1476 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001477
1478 if not self.fp:
1479 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001480 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001481
Guido van Rossum85825dc2007-08-27 17:03:28 +00001482 zinfo.file_size = len(data) # Uncompressed size
1483 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001484 if compress_type is not None:
1485 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001486 if zinfo.compress_type == ZIP_LZMA:
1487 # Compressed data includes an end-of-stream (EOS) marker
1488 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001489
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001490 self._writecheck(zinfo)
1491 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001492 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001493 co = _get_compressor(zinfo.compress_type)
1494 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001495 data = co.compress(data) + co.flush()
1496 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001497 else:
1498 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001499 zip64 = zinfo.file_size > ZIP64_LIMIT or \
Christian Tismer59202e52013-10-21 03:59:23 +02001500 zinfo.compress_size > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001501 if zip64 and not self._allowZip64:
1502 raise LargeZipFile("Filesize would require ZIP64 extensions")
1503 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001504 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001505 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001506 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001507 fmt = '<LQQ' if zip64 else '<LLL'
1508 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Christian Tismer59202e52013-10-21 03:59:23 +02001509 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001510 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001511 self.filelist.append(zinfo)
1512 self.NameToInfo[zinfo.filename] = zinfo
1513
1514 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001515 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001516 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001517
1518 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001519 """Close the file, and for mode "w" and "a" write the ending
1520 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001521 if self.fp is None:
1522 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001523
Antoine Pitrou17babc52012-11-17 23:50:08 +01001524 try:
1525 if self.mode in ("w", "a") and self._didModify: # write ending records
Antoine Pitrou17babc52012-11-17 23:50:08 +01001526 pos1 = self.fp.tell()
1527 for zinfo in self.filelist: # write central directory
Antoine Pitrou17babc52012-11-17 23:50:08 +01001528 dt = zinfo.date_time
1529 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1530 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1531 extra = []
1532 if zinfo.file_size > ZIP64_LIMIT \
Christian Tismer59202e52013-10-21 03:59:23 +02001533 or zinfo.compress_size > ZIP64_LIMIT:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001534 extra.append(zinfo.file_size)
1535 extra.append(zinfo.compress_size)
1536 file_size = 0xffffffff
1537 compress_size = 0xffffffff
1538 else:
1539 file_size = zinfo.file_size
1540 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001541
Antoine Pitrou17babc52012-11-17 23:50:08 +01001542 if zinfo.header_offset > ZIP64_LIMIT:
1543 extra.append(zinfo.header_offset)
1544 header_offset = 0xffffffff
1545 else:
1546 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001547
Antoine Pitrou17babc52012-11-17 23:50:08 +01001548 extra_data = zinfo.extra
Antoine Pitrou8572da52012-11-17 23:52:05 +01001549 min_version = 0
Antoine Pitrou17babc52012-11-17 23:50:08 +01001550 if extra:
1551 # Append a ZIP64 field to the extra's
1552 extra_data = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001553 '<HH' + 'Q'*len(extra),
1554 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001555
Antoine Pitrou8572da52012-11-17 23:52:05 +01001556 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001557
Antoine Pitrou8572da52012-11-17 23:52:05 +01001558 if zinfo.compress_type == ZIP_BZIP2:
1559 min_version = max(BZIP2_VERSION, min_version)
1560 elif zinfo.compress_type == ZIP_LZMA:
1561 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001562
Antoine Pitrou8572da52012-11-17 23:52:05 +01001563 extract_version = max(min_version, zinfo.extract_version)
1564 create_version = max(min_version, zinfo.create_version)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001565 try:
1566 filename, flag_bits = zinfo._encodeFilenameFlags()
1567 centdir = struct.pack(structCentralDir,
Christian Tismer59202e52013-10-21 03:59:23 +02001568 stringCentralDir, create_version,
1569 zinfo.create_system, extract_version, zinfo.reserved,
1570 flag_bits, zinfo.compress_type, dostime, dosdate,
1571 zinfo.CRC, compress_size, file_size,
1572 len(filename), len(extra_data), len(zinfo.comment),
1573 0, zinfo.internal_attr, zinfo.external_attr,
1574 header_offset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001575 except DeprecationWarning:
1576 print((structCentralDir, stringCentralDir, create_version,
Christian Tismer59202e52013-10-21 03:59:23 +02001577 zinfo.create_system, extract_version, zinfo.reserved,
1578 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1579 zinfo.CRC, compress_size, file_size,
1580 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1581 0, zinfo.internal_attr, zinfo.external_attr,
1582 header_offset), file=sys.stderr)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001583 raise
1584 self.fp.write(centdir)
1585 self.fp.write(filename)
1586 self.fp.write(extra_data)
1587 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001588
Antoine Pitrou17babc52012-11-17 23:50:08 +01001589 pos2 = self.fp.tell()
1590 # Write end-of-zip-archive record
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001591 centDirCount = len(self.filelist)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001592 centDirSize = pos2 - pos1
1593 centDirOffset = pos1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001594 requires_zip64 = None
1595 if centDirCount > ZIP_FILECOUNT_LIMIT:
1596 requires_zip64 = "Files count"
1597 elif centDirOffset > ZIP64_LIMIT:
1598 requires_zip64 = "Central directory offset"
1599 elif centDirSize > ZIP64_LIMIT:
1600 requires_zip64 = "Central directory size"
1601 if requires_zip64:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001602 # Need to write the ZIP64 end-of-archive records
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001603 if not self._allowZip64:
1604 raise LargeZipFile(requires_zip64 +
1605 " would require ZIP64 extensions")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001606 zip64endrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001607 structEndArchive64, stringEndArchive64,
1608 44, 45, 45, 0, 0, centDirCount, centDirCount,
1609 centDirSize, centDirOffset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001610 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001611
Antoine Pitrou17babc52012-11-17 23:50:08 +01001612 zip64locrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001613 structEndArchive64Locator,
1614 stringEndArchive64Locator, 0, pos2, 1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001615 self.fp.write(zip64locrec)
1616 centDirCount = min(centDirCount, 0xFFFF)
1617 centDirSize = min(centDirSize, 0xFFFFFFFF)
1618 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001619
Antoine Pitrou17babc52012-11-17 23:50:08 +01001620 endrec = struct.pack(structEndArchive, stringEndArchive,
Christian Tismer59202e52013-10-21 03:59:23 +02001621 0, 0, centDirCount, centDirCount,
1622 centDirSize, centDirOffset, len(self._comment))
Antoine Pitrou17babc52012-11-17 23:50:08 +01001623 self.fp.write(endrec)
1624 self.fp.write(self._comment)
1625 self.fp.flush()
1626 finally:
1627 fp = self.fp
1628 self.fp = None
1629 if not self._filePassed:
1630 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001631
1632
1633class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001634 """Class to create ZIP archives with Python library files and packages."""
1635
Georg Brandl8334fd92010-12-04 10:26:46 +00001636 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001637 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001638 ZipFile.__init__(self, file, mode=mode, compression=compression,
1639 allowZip64=allowZip64)
1640 self._optimize = optimize
1641
Christian Tismer59202e52013-10-21 03:59:23 +02001642 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001643 """Add all files from "pathname" to the ZIP archive.
1644
Fred Drake484d7352000-10-02 21:14:52 +00001645 If pathname is a package directory, search the directory and
1646 all package subdirectories recursively for all *.py and enter
1647 the modules into the archive. If pathname is a plain
1648 directory, listdir *.py and enter all modules. Else, pathname
1649 must be a Python *.py file and the module will be put into the
1650 archive. Added modules are always module.pyo or module.pyc.
1651 This method will compile the module.py into module.pyc if
1652 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001653 If filterfunc(pathname) is given, it is called with every argument.
1654 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001655 """
Christian Tismer59202e52013-10-21 03:59:23 +02001656 if filterfunc and not filterfunc(pathname):
1657 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001658 label = 'path' if os.path.isdir(pathname) else 'file'
1659 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001660 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001661 dir, name = os.path.split(pathname)
1662 if os.path.isdir(pathname):
1663 initname = os.path.join(pathname, "__init__.py")
1664 if os.path.isfile(initname):
1665 # This is a package directory, add it
1666 if basename:
1667 basename = "%s/%s" % (basename, name)
1668 else:
1669 basename = name
1670 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001671 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001672 fname, arcname = self._get_codename(initname[0:-3], basename)
1673 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001674 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001675 self.write(fname, arcname)
1676 dirlist = os.listdir(pathname)
1677 dirlist.remove("__init__.py")
1678 # Add all *.py files and package subdirectories
1679 for filename in dirlist:
1680 path = os.path.join(pathname, filename)
1681 root, ext = os.path.splitext(filename)
1682 if os.path.isdir(path):
1683 if os.path.isfile(os.path.join(path, "__init__.py")):
1684 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001685 self.writepy(path, basename,
1686 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001687 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001688 if filterfunc and not filterfunc(path):
1689 if self.debug:
1690 print('file "%s" skipped by filterfunc' % path)
1691 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001692 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001693 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001694 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001695 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001696 self.write(fname, arcname)
1697 else:
1698 # This is NOT a package directory, add its files at top level
1699 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001700 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001701 for filename in os.listdir(pathname):
1702 path = os.path.join(pathname, filename)
1703 root, ext = os.path.splitext(filename)
1704 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001705 if filterfunc and not filterfunc(path):
1706 if self.debug:
1707 print('file "%s" skipped by filterfunc' % path)
1708 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001709 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001710 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001711 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001712 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001713 self.write(fname, arcname)
1714 else:
1715 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001716 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001717 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001718 fname, arcname = self._get_codename(pathname[0:-3], basename)
1719 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001720 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001721 self.write(fname, arcname)
1722
1723 def _get_codename(self, pathname, basename):
1724 """Return (filename, archivename) for the path.
1725
Fred Drake484d7352000-10-02 21:14:52 +00001726 Given a module name path, return the correct file path and
1727 archive name, compiling if necessary. For example, given
1728 /python/lib/string, return (/python/lib/string.pyc, string).
1729 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001730 def _compile(file, optimize=-1):
1731 import py_compile
1732 if self.debug:
1733 print("Compiling", file)
1734 try:
1735 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001736 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001737 print(err.msg)
1738 return False
1739 return True
1740
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001741 file_py = pathname + ".py"
1742 file_pyc = pathname + ".pyc"
1743 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001744 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1745 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001746 if self._optimize == -1:
1747 # legacy mode: use whatever file is present
1748 if (os.path.isfile(file_pyo) and
1749 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1750 # Use .pyo file.
1751 arcname = fname = file_pyo
1752 elif (os.path.isfile(file_pyc) and
1753 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1754 # Use .pyc file.
1755 arcname = fname = file_pyc
1756 elif (os.path.isfile(pycache_pyc) and
1757 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1758 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1759 # file name in the archive.
1760 fname = pycache_pyc
1761 arcname = file_pyc
1762 elif (os.path.isfile(pycache_pyo) and
1763 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1764 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1765 # file name in the archive.
1766 fname = pycache_pyo
1767 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001768 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001769 # Compile py into PEP 3147 pyc file.
1770 if _compile(file_py):
1771 fname = (pycache_pyc if __debug__ else pycache_pyo)
1772 arcname = (file_pyc if __debug__ else file_pyo)
1773 else:
1774 fname = arcname = file_py
1775 else:
1776 # new mode: use given optimization level
1777 if self._optimize == 0:
1778 fname = pycache_pyc
1779 arcname = file_pyc
1780 else:
1781 fname = pycache_pyo
1782 arcname = file_pyo
1783 if not (os.path.isfile(fname) and
1784 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1785 if not _compile(file_py, optimize=self._optimize):
1786 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001787 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001788 if basename:
1789 archivename = "%s/%s" % (basename, archivename)
1790 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001791
1792
1793def main(args = None):
1794 import textwrap
1795 USAGE=textwrap.dedent("""\
1796 Usage:
1797 zipfile.py -l zipfile.zip # Show listing of a zipfile
1798 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1799 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1800 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1801 """)
1802 if args is None:
1803 args = sys.argv[1:]
1804
1805 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001806 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001807 sys.exit(1)
1808
1809 if args[0] == '-l':
1810 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001811 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001812 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001813 with ZipFile(args[1], 'r') as zf:
1814 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001815
1816 elif args[0] == '-t':
1817 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001818 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001819 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001820 with ZipFile(args[1], 'r') as zf:
1821 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001822 if badfile:
1823 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001824 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001825
1826 elif args[0] == '-e':
1827 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001828 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001829 sys.exit(1)
1830
Antoine Pitrou17babc52012-11-17 23:50:08 +01001831 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001832 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001833
1834 elif args[0] == '-c':
1835 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001836 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001837 sys.exit(1)
1838
1839 def addToZip(zf, path, zippath):
1840 if os.path.isfile(path):
1841 zf.write(path, zippath, ZIP_DEFLATED)
1842 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001843 if zippath:
1844 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001845 for nm in os.listdir(path):
1846 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001847 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001848 # else: ignore
1849
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001850 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001851 for path in args[2:]:
1852 zippath = os.path.basename(path)
1853 if not zippath:
1854 zippath = os.path.basename(os.path.dirname(path))
1855 if zippath in ('', os.curdir, os.pardir):
1856 zippath = ''
1857 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001858
1859if __name__ == "__main__":
1860 main()