blob: a1f5b1809ab22da8d35366f937cf9196f21d5e2b [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040027except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040032except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030053ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000054ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200167 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200183 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200193 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200199 if len(data) != sizeEndCentDir64Locator:
200 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000201 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
202 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000206 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207
208 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
210 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200211 if len(data) != sizeEndCentDir64:
212 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200214 dircount, dircount2, dirsize, diroffset = \
215 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000216 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 return endrec
218
219 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000220 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221 endrec[_ECD_DISK_NUMBER] = disk_num
222 endrec[_ECD_DISK_START] = disk_dir
223 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
224 endrec[_ECD_ENTRIES_TOTAL] = dircount2
225 endrec[_ECD_SIZE] = dirsize
226 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227 return endrec
228
229
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000230def _EndRecData(fpin):
231 """Return data from the "End of Central Directory" record, or None.
232
233 The data is a list of the nine items in the ZIP "End of central dir"
234 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Determine file size
237 fpin.seek(0, 2)
238 filesize = fpin.tell()
239
240 # Check to see if this is ZIP file with no archive comment (the
241 # "end of central directory" structure should be the last item in the
242 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000243 try:
244 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200245 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000247 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200248 if (len(data) == sizeEndCentDir and
249 data[0:4] == stringEndArchive and
250 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000251 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000253 endrec=list(endrec)
254
255 # Append a blank comment and record start offset
256 endrec.append(b"")
257 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000258
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000259 # Try to read the "Zip64 end of central directory" structure
260 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
262 # Either this is not a ZIP file, or it is a ZIP file with an archive
263 # comment. Search the end of the file for the "end of central directory"
264 # record signature. The comment is the last item in the ZIP file and may be
265 # up to 64K long. It is assumed that the "end of central directory" magic
266 # number does not appear in the comment.
267 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
268 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000270 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000271 if start >= 0:
272 # found the magic number; attempt to unpack and interpret
273 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200274 if len(recData) != sizeEndCentDir:
275 # Zip file is corrupted.
276 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400278 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
279 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
280 endrec.append(comment)
281 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000282
R David Murray4fbb9db2011-06-09 15:50:51 -0400283 # Try to read the "Zip64 end of central directory" structure
284 return _EndRecData64(fpin, maxCommentStart + start - filesize,
285 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200288 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000289
Fred Drake484d7352000-10-02 21:14:52 +0000290
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000292 """Class with attributes describing each file in the ZIP archive."""
293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200295 'orig_filename',
296 'filename',
297 'date_time',
298 'compress_type',
299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000338 self.comment = b"" # Comment for each file
339 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000340 if sys.platform == 'win32':
341 self.create_system = 0 # System which created ZIP archive
342 else:
343 # Assume everything else is unix-y
344 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200345 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
346 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000347 self.reserved = 0 # Must be zero
348 self.flag_bits = 0 # ZIP flag bits
349 self.volume = 0 # Volume number of file header
350 self.internal_attr = 0 # Internal attributes
351 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000352 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000353 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000354 # CRC CRC-32 of the uncompressed file
355 # compress_size Size of the compressed file
356 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200358 def __repr__(self):
359 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
360 if self.compress_type != ZIP_STORED:
361 result.append(' compress_type=%s' %
362 compressor_names.get(self.compress_type,
363 self.compress_type))
364 hi = self.external_attr >> 16
365 lo = self.external_attr & 0xFFFF
366 if hi:
367 result.append(' filemode=%r' % stat.filemode(hi))
368 if lo:
369 result.append(' external_attr=%#x' % lo)
370 isdir = self.filename[-1:] == '/'
371 if not isdir or self.file_size:
372 result.append(' file_size=%r' % self.file_size)
373 if ((not isdir or self.compress_size) and
374 (self.compress_type != ZIP_STORED or
375 self.file_size != self.compress_size)):
376 result.append(' compress_size=%r' % self.compress_size)
377 result.append('>')
378 return ''.join(result)
379
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200380 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000381 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382 dt = self.date_time
383 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000384 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000385 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000386 # Set these to zero because we write them after the file data
387 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388 else:
Tim Peterse1190062001-01-15 03:34:38 +0000389 CRC = self.CRC
390 compress_size = self.compress_size
391 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000392
393 extra = self.extra
394
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200395 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200396 if zip64 is None:
397 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
398 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000399 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000400 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200401 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200402 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
403 if not zip64:
404 raise LargeZipFile("Filesize would require ZIP64 extensions")
405 # File is larger than what fits into a 4 byte integer,
406 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000407 file_size = 0xffffffff
408 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200409 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000410
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200411 if self.compress_type == ZIP_BZIP2:
412 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200413 elif self.compress_type == ZIP_LZMA:
414 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200415
416 self.extract_version = max(min_version, self.extract_version)
417 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000418 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000419 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200420 self.extract_version, self.reserved, flag_bits,
421 self.compress_type, dostime, dosdate, CRC,
422 compress_size, file_size,
423 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000424 return header + filename + extra
425
426 def _encodeFilenameFlags(self):
427 try:
428 return self.filename.encode('ascii'), self.flag_bits
429 except UnicodeEncodeError:
430 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000431
432 def _decodeExtra(self):
433 # Try to decode the extra field.
434 extra = self.extra
435 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700436 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000437 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000438 if tp == 1:
439 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000440 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000441 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000442 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000443 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000444 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 elif ln == 0:
446 counts = ()
447 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000448 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449
450 idx = 0
451
452 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000453 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000454 self.file_size = counts[idx]
455 idx += 1
456
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000457 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 self.compress_size = counts[idx]
459 idx += 1
460
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000461 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000462 old = self.header_offset
463 self.header_offset = counts[idx]
464 idx+=1
465
466 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000467
468
Thomas Wouterscf297e42007-02-23 15:07:44 +0000469class _ZipDecrypter:
470 """Class to handle decryption of files stored within a ZIP archive.
471
472 ZIP supports a password-based form of encryption. Even though known
473 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000474 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000475
476 Usage:
477 zd = _ZipDecrypter(mypwd)
478 plain_char = zd(cypher_char)
479 plain_text = map(zd, cypher_text)
480 """
481
482 def _GenerateCRCTable():
483 """Generate a CRC-32 table.
484
485 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
486 internal keys. We noticed that a direct implementation is faster than
487 relying on binascii.crc32().
488 """
489 poly = 0xedb88320
490 table = [0] * 256
491 for i in range(256):
492 crc = i
493 for j in range(8):
494 if crc & 1:
495 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
496 else:
497 crc = ((crc >> 1) & 0x7FFFFFFF)
498 table[i] = crc
499 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500500 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000501
502 def _crc32(self, ch, crc):
503 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000504 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000505
506 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500507 if _ZipDecrypter.crctable is None:
508 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000509 self.key0 = 305419896
510 self.key1 = 591751049
511 self.key2 = 878082192
512 for p in pwd:
513 self._UpdateKeys(p)
514
515 def _UpdateKeys(self, c):
516 self.key0 = self._crc32(c, self.key0)
517 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
518 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000519 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000520
521 def __call__(self, c):
522 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000523 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000524 k = self.key2 | 2
525 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000526 self._UpdateKeys(c)
527 return c
528
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200529
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200530class LZMACompressor:
531
532 def __init__(self):
533 self._comp = None
534
535 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200536 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200537 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200538 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200539 ])
540 return struct.pack('<BBH', 9, 4, len(props)) + props
541
542 def compress(self, data):
543 if self._comp is None:
544 return self._init() + self._comp.compress(data)
545 return self._comp.compress(data)
546
547 def flush(self):
548 if self._comp is None:
549 return self._init() + self._comp.flush()
550 return self._comp.flush()
551
552
553class LZMADecompressor:
554
555 def __init__(self):
556 self._decomp = None
557 self._unconsumed = b''
558 self.eof = False
559
560 def decompress(self, data):
561 if self._decomp is None:
562 self._unconsumed += data
563 if len(self._unconsumed) <= 4:
564 return b''
565 psize, = struct.unpack('<H', self._unconsumed[2:4])
566 if len(self._unconsumed) <= 4 + psize:
567 return b''
568
569 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200570 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
571 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200572 ])
573 data = self._unconsumed[4 + psize:]
574 del self._unconsumed
575
576 result = self._decomp.decompress(data)
577 self.eof = self._decomp.eof
578 return result
579
580
581compressor_names = {
582 0: 'store',
583 1: 'shrink',
584 2: 'reduce',
585 3: 'reduce',
586 4: 'reduce',
587 5: 'reduce',
588 6: 'implode',
589 7: 'tokenize',
590 8: 'deflate',
591 9: 'deflate64',
592 10: 'implode',
593 12: 'bzip2',
594 14: 'lzma',
595 18: 'terse',
596 19: 'lz77',
597 97: 'wavpack',
598 98: 'ppmd',
599}
600
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200601def _check_compression(compression):
602 if compression == ZIP_STORED:
603 pass
604 elif compression == ZIP_DEFLATED:
605 if not zlib:
606 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200607 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200608 elif compression == ZIP_BZIP2:
609 if not bz2:
610 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200611 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200612 elif compression == ZIP_LZMA:
613 if not lzma:
614 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200615 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200616 else:
617 raise RuntimeError("That compression method is not supported")
618
619
620def _get_compressor(compress_type):
621 if compress_type == ZIP_DEFLATED:
622 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200623 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200624 elif compress_type == ZIP_BZIP2:
625 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200626 elif compress_type == ZIP_LZMA:
627 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200628 else:
629 return None
630
631
632def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200633 if compress_type == ZIP_STORED:
634 return None
635 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200636 return zlib.decompressobj(-15)
637 elif compress_type == ZIP_BZIP2:
638 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200639 elif compress_type == ZIP_LZMA:
640 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200641 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200642 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200643 if descr:
644 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
645 else:
646 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647
648
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200649class _SharedFile:
650 def __init__(self, file, pos, close):
651 self._file = file
652 self._pos = pos
653 self._close = close
654
655 def read(self, n=-1):
656 self._file.seek(self._pos)
657 data = self._file.read(n)
658 self._pos = self._file.tell()
659 return data
660
661 def close(self):
662 if self._file is not None:
663 fileobj = self._file
664 self._file = None
665 self._close(fileobj)
666
667
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000668class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000669 """File-like object for reading an archive member.
670 Is returned by ZipFile.open().
671 """
672
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000673 # Max size supported by decompressor.
674 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000675
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000676 # Read from compressed files in 4k blocks.
677 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000678
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000679 # Search for universal newlines or line chunks.
680 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
681
Łukasz Langae94980a2010-11-22 23:31:26 +0000682 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
683 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000684 self._fileobj = fileobj
685 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000686 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000687
Ezio Melotti92b47432010-01-28 01:44:41 +0000688 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000689 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000691
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200692 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000693
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200694 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000695 self._readbuffer = b''
696 self._offset = 0
697
698 self._universal = 'U' in mode
699 self.newlines = None
700
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000701 # Adjust read size for encrypted files since the first 12 bytes
702 # are for the encryption/password information.
703 if self._decrypter is not None:
704 self._compress_left -= 12
705
706 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000707 self.name = zipinfo.filename
708
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000709 if hasattr(zipinfo, 'CRC'):
710 self._expected_crc = zipinfo.CRC
711 self._running_crc = crc32(b'') & 0xffffffff
712 else:
713 self._expected_crc = None
714
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200715 def __repr__(self):
716 result = ['<%s.%s' % (self.__class__.__module__,
717 self.__class__.__qualname__)]
718 if not self.closed:
719 result.append(' name=%r mode=%r' % (self.name, self.mode))
720 if self._compress_type != ZIP_STORED:
721 result.append(' compress_type=%s' %
722 compressor_names.get(self._compress_type,
723 self._compress_type))
724 else:
725 result.append(' [closed]')
726 result.append('>')
727 return ''.join(result)
728
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000729 def readline(self, limit=-1):
730 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000731
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000732 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000733 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000734
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000735 if not self._universal and limit < 0:
736 # Shortcut common case - newline found in buffer.
737 i = self._readbuffer.find(b'\n', self._offset) + 1
738 if i > 0:
739 line = self._readbuffer[self._offset: i]
740 self._offset = i
741 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000742
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000743 if not self._universal:
744 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000745
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000746 line = b''
747 while limit < 0 or len(line) < limit:
748 readahead = self.peek(2)
749 if readahead == b'':
750 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000752 #
753 # Search for universal newlines or line chunks.
754 #
755 # The pattern returns either a line chunk or a newline, but not
756 # both. Combined with peek(2), we are assured that the sequence
757 # '\r\n' is always retrieved completely and never split into
758 # separate newlines - '\r', '\n' due to coincidental readaheads.
759 #
760 match = self.PATTERN.search(readahead)
761 newline = match.group('newline')
762 if newline is not None:
763 if self.newlines is None:
764 self.newlines = []
765 if newline not in self.newlines:
766 self.newlines.append(newline)
767 self._offset += len(newline)
768 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000770 chunk = match.group('chunk')
771 if limit >= 0:
772 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000774 self._offset += len(chunk)
775 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 return line
778
779 def peek(self, n=1):
780 """Returns buffered bytes without advancing the position."""
781 if n > len(self._readbuffer) - self._offset:
782 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200783 if len(chunk) > self._offset:
784 self._readbuffer = chunk + self._readbuffer[self._offset:]
785 self._offset = 0
786 else:
787 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000788
789 # Return up to 512 bytes to reduce allocation overhead for tight loops.
790 return self._readbuffer[self._offset: self._offset + 512]
791
792 def readable(self):
793 return True
794
795 def read(self, n=-1):
796 """Read and return up to n bytes.
797 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 if n is None or n < 0:
800 buf = self._readbuffer[self._offset:]
801 self._readbuffer = b''
802 self._offset = 0
803 while not self._eof:
804 buf += self._read1(self.MAX_N)
805 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806
Antoine Pitrou78157b32012-06-23 16:44:48 +0200807 end = n + self._offset
808 if end < len(self._readbuffer):
809 buf = self._readbuffer[self._offset:end]
810 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200811 return buf
812
Antoine Pitrou78157b32012-06-23 16:44:48 +0200813 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200814 buf = self._readbuffer[self._offset:]
815 self._readbuffer = b''
816 self._offset = 0
817 while n > 0 and not self._eof:
818 data = self._read1(n)
819 if n < len(data):
820 self._readbuffer = data
821 self._offset = n
822 buf += data[:n]
823 break
824 buf += data
825 n -= len(data)
826 return buf
827
828 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000829 # Update the CRC using the given data.
830 if self._expected_crc is None:
831 # No need to compute the CRC if we don't have a reference value
832 return
833 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
834 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200835 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000836 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000837
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000838 def read1(self, n):
839 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200841 if n is None or n < 0:
842 buf = self._readbuffer[self._offset:]
843 self._readbuffer = b''
844 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300845 while not self._eof:
846 data = self._read1(self.MAX_N)
847 if data:
848 buf += data
849 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200850 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851
Antoine Pitrou78157b32012-06-23 16:44:48 +0200852 end = n + self._offset
853 if end < len(self._readbuffer):
854 buf = self._readbuffer[self._offset:end]
855 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200856 return buf
857
Antoine Pitrou78157b32012-06-23 16:44:48 +0200858 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200859 buf = self._readbuffer[self._offset:]
860 self._readbuffer = b''
861 self._offset = 0
862 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300863 while not self._eof:
864 data = self._read1(n)
865 if n < len(data):
866 self._readbuffer = data
867 self._offset = n
868 buf += data[:n]
869 break
870 if data:
871 buf += data
872 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200873 return buf
874
875 def _read1(self, n):
876 # Read up to n compressed bytes with at most one read() system call,
877 # decrypt and decompress them.
878 if self._eof or n <= 0:
879 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000881 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 if self._compress_type == ZIP_DEFLATED:
883 ## Handle unconsumed data.
884 data = self._decompressor.unconsumed_tail
885 if n > len(data):
886 data += self._read2(n - len(data))
887 else:
888 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200890 if self._compress_type == ZIP_STORED:
891 self._eof = self._compress_left <= 0
892 elif self._compress_type == ZIP_DEFLATED:
893 n = max(n, self.MIN_READ_SIZE)
894 data = self._decompressor.decompress(data, n)
895 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200896 self._compress_left <= 0 and
897 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200898 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000899 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200900 else:
901 data = self._decompressor.decompress(data)
902 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200904 data = data[:self._left]
905 self._left -= len(data)
906 if self._left <= 0:
907 self._eof = True
908 self._update_crc(data)
909 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000910
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200911 def _read2(self, n):
912 if self._compress_left <= 0:
913 return b''
914
915 n = max(n, self.MIN_READ_SIZE)
916 n = min(n, self._compress_left)
917
918 data = self._fileobj.read(n)
919 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200920 if not data:
921 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200922
923 if self._decrypter is not None:
924 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000925 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926
Łukasz Langae94980a2010-11-22 23:31:26 +0000927 def close(self):
928 try:
929 if self._close_fileobj:
930 self._fileobj.close()
931 finally:
932 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000934
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000935class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000936 """ Class with methods to open, read, write, close, list zip files.
937
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200938 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000939
Fred Drake3d9091e2001-03-26 15:49:24 +0000940 file: Either the path to the file, or a file-like object.
941 If it is a path, the file will be opened and closed by ZipFile.
942 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200943 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
944 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000945 allowZip64: if True ZipFile will create files with ZIP64 extensions when
946 needed, otherwise it will raise an exception when this would
947 be necessary.
948
Fred Drake3d9091e2001-03-26 15:49:24 +0000949 """
Fred Drake484d7352000-10-02 21:14:52 +0000950
Fred Drake90eac282001-02-28 05:29:34 +0000951 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800952 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000953
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200954 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000955 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000956 if mode not in ("r", "w", "a"):
957 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
958
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200959 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000960
961 self._allowZip64 = allowZip64
962 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000963 self.debug = 0 # Level of printing: 0 through 3
964 self.NameToInfo = {} # Find file info given name
965 self.filelist = [] # List of ZipInfo instances for archive
966 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200967 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +0000968 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400969 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000970
Fred Drake3d9091e2001-03-26 15:49:24 +0000971 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000972 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000973 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000974 self._filePassed = 0
975 self.filename = file
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200976 modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
977 'r+b': 'w+b', 'w+b': 'wb'}
978 filemode = modeDict[mode]
979 while True:
980 try:
981 self.fp = io.open(file, filemode)
982 except OSError:
983 if filemode in modeDict:
984 filemode = modeDict[filemode]
985 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +0000986 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200987 break
Fred Drake3d9091e2001-03-26 15:49:24 +0000988 else:
989 self._filePassed = 1
990 self.fp = file
991 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200992 self._fileRefCnt = 1
Tim Petersa19a1682001-03-29 04:36:09 +0000993
Antoine Pitrou17babc52012-11-17 23:50:08 +0100994 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200995 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000996 self._RealGetContents()
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200997 elif mode == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000998 # set the modified flag so central directory gets written
999 # even if no files are added to the archive
1000 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001001 self.start_dir = 0
1002 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001003 try:
1004 # See if file is a zip file
1005 self._RealGetContents()
1006 # seek to start of directory and overwrite
1007 self.fp.seek(self.start_dir, 0)
1008 except BadZipFile:
1009 # file is not a zip file, just append
1010 self.fp.seek(0, 2)
1011
1012 # set the modified flag so central directory gets written
1013 # even if no files are added to the archive
1014 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001015 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001016 else:
1017 raise RuntimeError('Mode must be "r", "w" or "a"')
1018 except:
1019 fp = self.fp
1020 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001021 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001022 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001024 def __enter__(self):
1025 return self
1026
1027 def __exit__(self, type, value, traceback):
1028 self.close()
1029
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001030 def __repr__(self):
1031 result = ['<%s.%s' % (self.__class__.__module__,
1032 self.__class__.__qualname__)]
1033 if self.fp is not None:
1034 if self._filePassed:
1035 result.append(' file=%r' % self.fp)
1036 elif self.filename is not None:
1037 result.append(' filename=%r' % self.filename)
1038 result.append(' mode=%r' % self.mode)
1039 else:
1040 result.append(' [closed]')
1041 result.append('>')
1042 return ''.join(result)
1043
Tim Peters7d3bad62001-04-04 18:56:49 +00001044 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001045 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001046 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001047 try:
1048 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001049 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001050 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001051 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001052 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001054 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001055 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1056 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001057 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001058
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001060 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001061 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1062 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001063 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1064
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001066 inferred = concat + offset_cd
1067 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001068 # self.start_dir: Position of start of central directory
1069 self.start_dir = offset_cd + concat
1070 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001071 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001072 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 total = 0
1074 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001075 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001076 if len(centdir) != sizeCentralDir:
1077 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001078 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001079 if centdir[_CD_SIGNATURE] != stringCentralDir:
1080 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001082 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001083 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001084 flags = centdir[5]
1085 if flags & 0x800:
1086 # UTF-8 file names extension
1087 filename = filename.decode('utf-8')
1088 else:
1089 # Historical ZIP filename encoding
1090 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001092 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001093 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1094 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001095 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001097 x.flag_bits, x.compress_type, t, d,
1098 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001099 if x.extract_version > MAX_EXTRACT_VERSION:
1100 raise NotImplementedError("zip file version %.1f" %
1101 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1103 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001104 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001106 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001107
1108 x._decodeExtra()
1109 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001110 self.filelist.append(x)
1111 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001112
1113 # update total bytes read from central directory
1114 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1115 + centdir[_CD_EXTRA_FIELD_LENGTH]
1116 + centdir[_CD_COMMENT_LENGTH])
1117
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001119 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001120
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001121
1122 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001123 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001124 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125
1126 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001127 """Return a list of class ZipInfo instances for files in the
1128 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 return self.filelist
1130
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001131 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001132 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001133 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1134 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001136 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001137 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1138 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139
1140 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001141 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001142 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 for zinfo in self.filelist:
1144 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001145 # Read by chunks, to avoid an OverflowError or a
1146 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001147 with self.open(zinfo.filename, "r") as f:
1148 while f.read(chunk_size): # Check CRC-32
1149 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001150 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001151 return zinfo.filename
1152
1153 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001154 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001155 info = self.NameToInfo.get(name)
1156 if info is None:
1157 raise KeyError(
1158 'There is no item named %r in the archive' % name)
1159
1160 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001161
Thomas Wouterscf297e42007-02-23 15:07:44 +00001162 def setpassword(self, pwd):
1163 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001164 if pwd and not isinstance(pwd, bytes):
1165 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1166 if pwd:
1167 self.pwd = pwd
1168 else:
1169 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001170
R David Murrayf50b38a2012-04-12 18:44:58 -04001171 @property
1172 def comment(self):
1173 """The comment text associated with the ZIP file."""
1174 return self._comment
1175
1176 @comment.setter
1177 def comment(self, comment):
1178 if not isinstance(comment, bytes):
1179 raise TypeError("comment: expected bytes, got %s" % type(comment))
1180 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001181 if len(comment) > ZIP_MAX_COMMENT:
1182 import warnings
1183 warnings.warn('Archive comment is too long; truncating to %d bytes'
1184 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001185 comment = comment[:ZIP_MAX_COMMENT]
1186 self._comment = comment
1187 self._didModify = True
1188
Thomas Wouterscf297e42007-02-23 15:07:44 +00001189 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001190 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001191 with self.open(name, "r", pwd) as fp:
1192 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001193
1194 def open(self, name, mode="r", pwd=None):
1195 """Return file-like object for 'name'."""
1196 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001197 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001198 if 'U' in mode:
1199 import warnings
1200 warnings.warn("'U' mode is deprecated",
1201 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001202 if pwd and not isinstance(pwd, bytes):
1203 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001205 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001206 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001207
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001208 # Make sure we have an info object
1209 if isinstance(name, ZipInfo):
1210 # 'name' is already an info object
1211 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001212 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001213 # Get info object for name
1214 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001215
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001216 self._fileRefCnt += 1
1217 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001218 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001219 # Skip the file header:
1220 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001221 if len(fheader) != sizeFileHeader:
1222 raise BadZipFile("Truncated file header")
1223 fheader = struct.unpack(structFileHeader, fheader)
1224 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001225 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001226
Antoine Pitrou17babc52012-11-17 23:50:08 +01001227 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1228 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1229 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230
Antoine Pitrou8572da52012-11-17 23:52:05 +01001231 if zinfo.flag_bits & 0x20:
1232 # Zip 2.7: compressed patched data
1233 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001234
Antoine Pitrou8572da52012-11-17 23:52:05 +01001235 if zinfo.flag_bits & 0x40:
1236 # strong encryption
1237 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001238
Antoine Pitrou17babc52012-11-17 23:50:08 +01001239 if zinfo.flag_bits & 0x800:
1240 # UTF-8 filename
1241 fname_str = fname.decode("utf-8")
1242 else:
1243 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001244
Antoine Pitrou17babc52012-11-17 23:50:08 +01001245 if fname_str != zinfo.orig_filename:
1246 raise BadZipFile(
1247 'File name in directory %r and header %r differ.'
1248 % (zinfo.orig_filename, fname))
1249
1250 # check for encrypted flag & handle password
1251 is_encrypted = zinfo.flag_bits & 0x1
1252 zd = None
1253 if is_encrypted:
1254 if not pwd:
1255 pwd = self.pwd
1256 if not pwd:
1257 raise RuntimeError("File %s is encrypted, password "
1258 "required for extraction" % name)
1259
1260 zd = _ZipDecrypter(pwd)
1261 # The first 12 bytes in the cypher stream is an encryption header
1262 # used to strengthen the algorithm. The first 11 bytes are
1263 # completely random, while the 12th contains the MSB of the CRC,
1264 # or the MSB of the file time depending on the header type
1265 # and is used to check the correctness of the password.
1266 header = zef_file.read(12)
1267 h = list(map(zd, header[0:12]))
1268 if zinfo.flag_bits & 0x8:
1269 # compare against the file type from extended local headers
1270 check_byte = (zinfo._raw_time >> 8) & 0xff
1271 else:
1272 # compare against the CRC otherwise
1273 check_byte = (zinfo.CRC >> 24) & 0xff
1274 if h[11] != check_byte:
1275 raise RuntimeError("Bad password for file", name)
1276
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001277 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001278 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001279 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001280 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001281
Christian Heimes790c8232008-01-07 21:14:23 +00001282 def extract(self, member, path=None, pwd=None):
1283 """Extract a member from the archive to the current working directory,
1284 using its full name. Its file information is extracted as accurately
1285 as possible. `member' may be a filename or a ZipInfo object. You can
1286 specify a different directory using `path'.
1287 """
1288 if not isinstance(member, ZipInfo):
1289 member = self.getinfo(member)
1290
1291 if path is None:
1292 path = os.getcwd()
1293
1294 return self._extract_member(member, path, pwd)
1295
1296 def extractall(self, path=None, members=None, pwd=None):
1297 """Extract all members from the archive to the current working
1298 directory. `path' specifies a different directory to extract to.
1299 `members' is optional and must be a subset of the list returned
1300 by namelist().
1301 """
1302 if members is None:
1303 members = self.namelist()
1304
1305 for zipinfo in members:
1306 self.extract(zipinfo, path, pwd)
1307
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001308 @classmethod
1309 def _sanitize_windows_name(cls, arcname, pathsep):
1310 """Replace bad characters and remove trailing dots from parts."""
1311 table = cls._windows_illegal_name_trans_table
1312 if not table:
1313 illegal = ':<>|"?*'
1314 table = str.maketrans(illegal, '_' * len(illegal))
1315 cls._windows_illegal_name_trans_table = table
1316 arcname = arcname.translate(table)
1317 # remove trailing dots
1318 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1319 # rejoin, removing empty parts.
1320 arcname = pathsep.join(x for x in arcname if x)
1321 return arcname
1322
Christian Heimes790c8232008-01-07 21:14:23 +00001323 def _extract_member(self, member, targetpath, pwd):
1324 """Extract the ZipInfo object 'member' to a physical
1325 file on the path targetpath.
1326 """
1327 # build the destination pathname, replacing
1328 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001329 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001330
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001331 if os.path.altsep:
1332 arcname = arcname.replace(os.path.altsep, os.path.sep)
1333 # interpret absolute pathname as relative, remove drive letter or
1334 # UNC path, redundant separators, "." and ".." components.
1335 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001336 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001337 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001338 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001339 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001340 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001341 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001342
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001343 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001344 targetpath = os.path.normpath(targetpath)
1345
1346 # Create all upper directories if necessary.
1347 upperdirs = os.path.dirname(targetpath)
1348 if upperdirs and not os.path.exists(upperdirs):
1349 os.makedirs(upperdirs)
1350
Martin v. Löwis59e47792009-01-24 14:10:07 +00001351 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001352 if not os.path.isdir(targetpath):
1353 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001354 return targetpath
1355
Antoine Pitrou17babc52012-11-17 23:50:08 +01001356 with self.open(member, pwd=pwd) as source, \
1357 open(targetpath, "wb") as target:
1358 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001359
1360 return targetpath
1361
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001363 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001364 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001365 import warnings
1366 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001367 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001368 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001369 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001370 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001371 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001372 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001373 if not self._allowZip64:
1374 requires_zip64 = None
1375 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1376 requires_zip64 = "Files count"
1377 elif zinfo.file_size > ZIP64_LIMIT:
1378 requires_zip64 = "Filesize"
1379 elif zinfo.header_offset > ZIP64_LIMIT:
1380 requires_zip64 = "Zipfile size"
1381 if requires_zip64:
1382 raise LargeZipFile(requires_zip64 +
1383 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001384
1385 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001386 """Put the bytes from filename into the archive under the name
1387 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001388 if not self.fp:
1389 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001390 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001391
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001392 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001393 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001394 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001395 date_time = mtime[0:6]
1396 # Create ZipInfo instance to store file information
1397 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001398 arcname = filename
1399 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1400 while arcname[0] in (os.sep, os.altsep):
1401 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001402 if isdir:
1403 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001404 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001405 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001406 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001407 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001408 else:
Tim Peterse1190062001-01-15 03:34:38 +00001409 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001410
1411 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001412 zinfo.flag_bits = 0x00
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001413 self.fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +00001414 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001415 if zinfo.compress_type == ZIP_LZMA:
1416 # Compressed data includes an end-of-stream (EOS) marker
1417 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001418
1419 self._writecheck(zinfo)
1420 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001421
1422 if isdir:
1423 zinfo.file_size = 0
1424 zinfo.compress_size = 0
1425 zinfo.CRC = 0
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001426 zinfo.external_attr |= 0x10 # MS-DOS directory flag
Martin v. Löwis59e47792009-01-24 14:10:07 +00001427 self.filelist.append(zinfo)
1428 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001429 self.fp.write(zinfo.FileHeader(False))
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001430 self.start_dir = self.fp.tell()
Martin v. Löwis59e47792009-01-24 14:10:07 +00001431 return
1432
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001433 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001434 with open(filename, "rb") as fp:
1435 # Must overwrite CRC and sizes with correct data later
1436 zinfo.CRC = CRC = 0
1437 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001438 # Compressed size can be larger than uncompressed size
1439 zip64 = self._allowZip64 and \
Christian Tismer59202e52013-10-21 03:59:23 +02001440 zinfo.file_size * 1.05 > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001441 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001442 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001443 while 1:
1444 buf = fp.read(1024 * 8)
1445 if not buf:
1446 break
1447 file_size = file_size + len(buf)
1448 CRC = crc32(buf, CRC) & 0xffffffff
1449 if cmpr:
1450 buf = cmpr.compress(buf)
1451 compress_size = compress_size + len(buf)
1452 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001453 if cmpr:
1454 buf = cmpr.flush()
1455 compress_size = compress_size + len(buf)
1456 self.fp.write(buf)
1457 zinfo.compress_size = compress_size
1458 else:
1459 zinfo.compress_size = file_size
1460 zinfo.CRC = CRC
1461 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001462 if not zip64 and self._allowZip64:
1463 if file_size > ZIP64_LIMIT:
1464 raise RuntimeError('File size has increased during compressing')
1465 if compress_size > ZIP64_LIMIT:
1466 raise RuntimeError('Compressed size larger than uncompressed size')
1467 # Seek backwards and write file header (which will now include
1468 # correct CRC and file sizes)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001469 self.start_dir = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001470 self.fp.seek(zinfo.header_offset, 0)
1471 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001472 self.fp.seek(self.start_dir, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001473 self.filelist.append(zinfo)
1474 self.NameToInfo[zinfo.filename] = zinfo
1475
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001476 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001477 """Write a file into the archive. The contents is 'data', which
1478 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1479 it is encoded as UTF-8 first.
1480 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001481 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001482 if isinstance(data, str):
1483 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001484 if not isinstance(zinfo_or_arcname, ZipInfo):
1485 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001486 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001487 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001488 if zinfo.filename[-1] == '/':
1489 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1490 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1491 else:
1492 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001493 else:
1494 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001495
1496 if not self.fp:
1497 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001498 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001499
Guido van Rossum85825dc2007-08-27 17:03:28 +00001500 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001501 self.fp.seek(self.start_dir, 0)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001502 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001503 if compress_type is not None:
1504 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001505 if zinfo.compress_type == ZIP_LZMA:
1506 # Compressed data includes an end-of-stream (EOS) marker
1507 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001508
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001509 self._writecheck(zinfo)
1510 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001511 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001512 co = _get_compressor(zinfo.compress_type)
1513 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001514 data = co.compress(data) + co.flush()
1515 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001516 else:
1517 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001518 zip64 = zinfo.file_size > ZIP64_LIMIT or \
Christian Tismer59202e52013-10-21 03:59:23 +02001519 zinfo.compress_size > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001520 if zip64 and not self._allowZip64:
1521 raise LargeZipFile("Filesize would require ZIP64 extensions")
1522 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001523 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001524 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001525 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001526 fmt = '<LQQ' if zip64 else '<LLL'
1527 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Christian Tismer59202e52013-10-21 03:59:23 +02001528 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001529 self.fp.flush()
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001530 self.start_dir = self.fp.tell()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001531 self.filelist.append(zinfo)
1532 self.NameToInfo[zinfo.filename] = zinfo
1533
1534 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001535 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001536 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001537
1538 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001539 """Close the file, and for mode "w" and "a" write the ending
1540 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001541 if self.fp is None:
1542 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001543
Antoine Pitrou17babc52012-11-17 23:50:08 +01001544 try:
1545 if self.mode in ("w", "a") and self._didModify: # write ending records
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001546 self.fp.seek(self.start_dir, 0)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001547 for zinfo in self.filelist: # write central directory
Antoine Pitrou17babc52012-11-17 23:50:08 +01001548 dt = zinfo.date_time
1549 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1550 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1551 extra = []
1552 if zinfo.file_size > ZIP64_LIMIT \
Christian Tismer59202e52013-10-21 03:59:23 +02001553 or zinfo.compress_size > ZIP64_LIMIT:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001554 extra.append(zinfo.file_size)
1555 extra.append(zinfo.compress_size)
1556 file_size = 0xffffffff
1557 compress_size = 0xffffffff
1558 else:
1559 file_size = zinfo.file_size
1560 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001561
Antoine Pitrou17babc52012-11-17 23:50:08 +01001562 if zinfo.header_offset > ZIP64_LIMIT:
1563 extra.append(zinfo.header_offset)
1564 header_offset = 0xffffffff
1565 else:
1566 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001567
Antoine Pitrou17babc52012-11-17 23:50:08 +01001568 extra_data = zinfo.extra
Antoine Pitrou8572da52012-11-17 23:52:05 +01001569 min_version = 0
Antoine Pitrou17babc52012-11-17 23:50:08 +01001570 if extra:
1571 # Append a ZIP64 field to the extra's
1572 extra_data = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001573 '<HH' + 'Q'*len(extra),
1574 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001575
Antoine Pitrou8572da52012-11-17 23:52:05 +01001576 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001577
Antoine Pitrou8572da52012-11-17 23:52:05 +01001578 if zinfo.compress_type == ZIP_BZIP2:
1579 min_version = max(BZIP2_VERSION, min_version)
1580 elif zinfo.compress_type == ZIP_LZMA:
1581 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001582
Antoine Pitrou8572da52012-11-17 23:52:05 +01001583 extract_version = max(min_version, zinfo.extract_version)
1584 create_version = max(min_version, zinfo.create_version)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001585 try:
1586 filename, flag_bits = zinfo._encodeFilenameFlags()
1587 centdir = struct.pack(structCentralDir,
Christian Tismer59202e52013-10-21 03:59:23 +02001588 stringCentralDir, create_version,
1589 zinfo.create_system, extract_version, zinfo.reserved,
1590 flag_bits, zinfo.compress_type, dostime, dosdate,
1591 zinfo.CRC, compress_size, file_size,
1592 len(filename), len(extra_data), len(zinfo.comment),
1593 0, zinfo.internal_attr, zinfo.external_attr,
1594 header_offset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001595 except DeprecationWarning:
1596 print((structCentralDir, stringCentralDir, create_version,
Christian Tismer59202e52013-10-21 03:59:23 +02001597 zinfo.create_system, extract_version, zinfo.reserved,
1598 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1599 zinfo.CRC, compress_size, file_size,
1600 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1601 0, zinfo.internal_attr, zinfo.external_attr,
1602 header_offset), file=sys.stderr)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001603 raise
1604 self.fp.write(centdir)
1605 self.fp.write(filename)
1606 self.fp.write(extra_data)
1607 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001608
Antoine Pitrou17babc52012-11-17 23:50:08 +01001609 pos2 = self.fp.tell()
1610 # Write end-of-zip-archive record
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001611 centDirCount = len(self.filelist)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001612 centDirSize = pos2 - self.start_dir
1613 centDirOffset = self.start_dir
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001614 requires_zip64 = None
1615 if centDirCount > ZIP_FILECOUNT_LIMIT:
1616 requires_zip64 = "Files count"
1617 elif centDirOffset > ZIP64_LIMIT:
1618 requires_zip64 = "Central directory offset"
1619 elif centDirSize > ZIP64_LIMIT:
1620 requires_zip64 = "Central directory size"
1621 if requires_zip64:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001622 # Need to write the ZIP64 end-of-archive records
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001623 if not self._allowZip64:
1624 raise LargeZipFile(requires_zip64 +
1625 " would require ZIP64 extensions")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001626 zip64endrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001627 structEndArchive64, stringEndArchive64,
1628 44, 45, 45, 0, 0, centDirCount, centDirCount,
1629 centDirSize, centDirOffset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001630 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001631
Antoine Pitrou17babc52012-11-17 23:50:08 +01001632 zip64locrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001633 structEndArchive64Locator,
1634 stringEndArchive64Locator, 0, pos2, 1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001635 self.fp.write(zip64locrec)
1636 centDirCount = min(centDirCount, 0xFFFF)
1637 centDirSize = min(centDirSize, 0xFFFFFFFF)
1638 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001639
Antoine Pitrou17babc52012-11-17 23:50:08 +01001640 endrec = struct.pack(structEndArchive, stringEndArchive,
Christian Tismer59202e52013-10-21 03:59:23 +02001641 0, 0, centDirCount, centDirCount,
1642 centDirSize, centDirOffset, len(self._comment))
Antoine Pitrou17babc52012-11-17 23:50:08 +01001643 self.fp.write(endrec)
1644 self.fp.write(self._comment)
1645 self.fp.flush()
1646 finally:
1647 fp = self.fp
1648 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001649 self._fpclose(fp)
1650
1651 def _fpclose(self, fp):
1652 assert self._fileRefCnt > 0
1653 self._fileRefCnt -= 1
1654 if not self._fileRefCnt and not self._filePassed:
1655 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001656
1657
1658class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001659 """Class to create ZIP archives with Python library files and packages."""
1660
Georg Brandl8334fd92010-12-04 10:26:46 +00001661 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001662 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001663 ZipFile.__init__(self, file, mode=mode, compression=compression,
1664 allowZip64=allowZip64)
1665 self._optimize = optimize
1666
Christian Tismer59202e52013-10-21 03:59:23 +02001667 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001668 """Add all files from "pathname" to the ZIP archive.
1669
Fred Drake484d7352000-10-02 21:14:52 +00001670 If pathname is a package directory, search the directory and
1671 all package subdirectories recursively for all *.py and enter
1672 the modules into the archive. If pathname is a plain
1673 directory, listdir *.py and enter all modules. Else, pathname
1674 must be a Python *.py file and the module will be put into the
1675 archive. Added modules are always module.pyo or module.pyc.
1676 This method will compile the module.py into module.pyc if
1677 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001678 If filterfunc(pathname) is given, it is called with every argument.
1679 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001680 """
Christian Tismer59202e52013-10-21 03:59:23 +02001681 if filterfunc and not filterfunc(pathname):
1682 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001683 label = 'path' if os.path.isdir(pathname) else 'file'
1684 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001685 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001686 dir, name = os.path.split(pathname)
1687 if os.path.isdir(pathname):
1688 initname = os.path.join(pathname, "__init__.py")
1689 if os.path.isfile(initname):
1690 # This is a package directory, add it
1691 if basename:
1692 basename = "%s/%s" % (basename, name)
1693 else:
1694 basename = name
1695 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001696 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001697 fname, arcname = self._get_codename(initname[0:-3], basename)
1698 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001699 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001700 self.write(fname, arcname)
1701 dirlist = os.listdir(pathname)
1702 dirlist.remove("__init__.py")
1703 # Add all *.py files and package subdirectories
1704 for filename in dirlist:
1705 path = os.path.join(pathname, filename)
1706 root, ext = os.path.splitext(filename)
1707 if os.path.isdir(path):
1708 if os.path.isfile(os.path.join(path, "__init__.py")):
1709 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001710 self.writepy(path, basename,
1711 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001712 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001713 if filterfunc and not filterfunc(path):
1714 if self.debug:
1715 print('file "%s" skipped by filterfunc' % path)
1716 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001717 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001718 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001719 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001720 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001721 self.write(fname, arcname)
1722 else:
1723 # This is NOT a package directory, add its files at top level
1724 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001725 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001726 for filename in os.listdir(pathname):
1727 path = os.path.join(pathname, filename)
1728 root, ext = os.path.splitext(filename)
1729 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001730 if filterfunc and not filterfunc(path):
1731 if self.debug:
1732 print('file "%s" skipped by filterfunc' % path)
1733 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001734 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001735 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001736 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001737 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001738 self.write(fname, arcname)
1739 else:
1740 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001741 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001742 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001743 fname, arcname = self._get_codename(pathname[0:-3], basename)
1744 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001745 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001746 self.write(fname, arcname)
1747
1748 def _get_codename(self, pathname, basename):
1749 """Return (filename, archivename) for the path.
1750
Fred Drake484d7352000-10-02 21:14:52 +00001751 Given a module name path, return the correct file path and
1752 archive name, compiling if necessary. For example, given
1753 /python/lib/string, return (/python/lib/string.pyc, string).
1754 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001755 def _compile(file, optimize=-1):
1756 import py_compile
1757 if self.debug:
1758 print("Compiling", file)
1759 try:
1760 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001761 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001762 print(err.msg)
1763 return False
1764 return True
1765
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001766 file_py = pathname + ".py"
1767 file_pyc = pathname + ".pyc"
1768 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001769 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1770 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001771 if self._optimize == -1:
1772 # legacy mode: use whatever file is present
1773 if (os.path.isfile(file_pyo) and
1774 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1775 # Use .pyo file.
1776 arcname = fname = file_pyo
1777 elif (os.path.isfile(file_pyc) and
1778 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1779 # Use .pyc file.
1780 arcname = fname = file_pyc
1781 elif (os.path.isfile(pycache_pyc) and
1782 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1783 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1784 # file name in the archive.
1785 fname = pycache_pyc
1786 arcname = file_pyc
1787 elif (os.path.isfile(pycache_pyo) and
1788 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1789 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1790 # file name in the archive.
1791 fname = pycache_pyo
1792 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001793 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001794 # Compile py into PEP 3147 pyc file.
1795 if _compile(file_py):
1796 fname = (pycache_pyc if __debug__ else pycache_pyo)
1797 arcname = (file_pyc if __debug__ else file_pyo)
1798 else:
1799 fname = arcname = file_py
1800 else:
1801 # new mode: use given optimization level
1802 if self._optimize == 0:
1803 fname = pycache_pyc
1804 arcname = file_pyc
1805 else:
1806 fname = pycache_pyo
1807 arcname = file_pyo
1808 if not (os.path.isfile(fname) and
1809 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1810 if not _compile(file_py, optimize=self._optimize):
1811 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001812 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001813 if basename:
1814 archivename = "%s/%s" % (basename, archivename)
1815 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001816
1817
1818def main(args = None):
1819 import textwrap
1820 USAGE=textwrap.dedent("""\
1821 Usage:
1822 zipfile.py -l zipfile.zip # Show listing of a zipfile
1823 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1824 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1825 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1826 """)
1827 if args is None:
1828 args = sys.argv[1:]
1829
1830 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001831 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001832 sys.exit(1)
1833
1834 if args[0] == '-l':
1835 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001836 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001837 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001838 with ZipFile(args[1], 'r') as zf:
1839 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001840
1841 elif args[0] == '-t':
1842 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001843 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001844 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001845 with ZipFile(args[1], 'r') as zf:
1846 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001847 if badfile:
1848 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001849 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001850
1851 elif args[0] == '-e':
1852 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001853 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001854 sys.exit(1)
1855
Antoine Pitrou17babc52012-11-17 23:50:08 +01001856 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001857 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001858
1859 elif args[0] == '-c':
1860 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001861 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001862 sys.exit(1)
1863
1864 def addToZip(zf, path, zippath):
1865 if os.path.isfile(path):
1866 zf.write(path, zippath, ZIP_DEFLATED)
1867 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001868 if zippath:
1869 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001870 for nm in os.listdir(path):
1871 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001872 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001873 # else: ignore
1874
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001875 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001876 for path in args[2:]:
1877 zippath = os.path.basename(path)
1878 if not zippath:
1879 zippath = os.path.basename(os.path.dirname(path))
1880 if zippath in ('', os.curdir, os.pardir):
1881 zippath = ''
1882 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001883
1884if __name__ == "__main__":
1885 main()