blob: 845c6a96fc61bef97c28c274522303f560933122 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
Serhiy Storchakaf15e5242015-01-26 13:53:38 +020016import threading
Barry Warsaw28a691b2010-04-17 00:19:56 +000017
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018
19try:
Tim Peterse1190062001-01-15 03:34:38 +000020 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000021 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040022except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020026try:
27 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029 bz2 = None
30
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020031try:
32 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040033except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034 lzma = None
35
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020036__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000038 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Georg Brandl4d540882010-10-28 06:42:33 +000040class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000041 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042
43
44class LargeZipFile(Exception):
45 """
46 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
47 and those extensions are disabled.
48 """
49
Georg Brandl4d540882010-10-28 06:42:33 +000050error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
51
Guido van Rossum32abe6f2000-03-31 17:30:02 +000052
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000053ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030054ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000056
Guido van Rossum32abe6f2000-03-31 17:30:02 +000057# constants for Zip file compression methods
58ZIP_STORED = 0
59ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020060ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020061ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000062# Other ZIP compression methods not supported
63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020064DEFAULT_VERSION = 20
65ZIP64_VERSION = 45
66BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020068# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020070
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071# Below are some formats and associated data for reading/writing headers using
72# the struct module. The names and structures of headers/records are those used
73# in the PKWARE description of the ZIP file format:
74# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
75# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000076
Martin v. Löwisb09b8442008-07-03 14:13:42 +000077# The "end of central directory" structure, magic number, size, and indices
78# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000079structEndArchive = b"<4s4H2LH"
80stringEndArchive = b"PK\005\006"
81sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000082
83_ECD_SIGNATURE = 0
84_ECD_DISK_NUMBER = 1
85_ECD_DISK_START = 2
86_ECD_ENTRIES_THIS_DISK = 3
87_ECD_ENTRIES_TOTAL = 4
88_ECD_SIZE = 5
89_ECD_OFFSET = 6
90_ECD_COMMENT_SIZE = 7
91# These last two indices are not part of the structure as defined in the
92# spec, but they are used internally by this module as a convenience
93_ECD_COMMENT = 8
94_ECD_LOCATION = 9
95
96# The "central directory" structure, magic number, size, and indices
97# of entries in the structure (section V.F in the format document)
98structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000099stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100sizeCentralDir = struct.calcsize(structCentralDir)
101
Fred Drake3e038e52001-02-28 17:56:26 +0000102# indexes of entries in the central directory structure
103_CD_SIGNATURE = 0
104_CD_CREATE_VERSION = 1
105_CD_CREATE_SYSTEM = 2
106_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000107_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000108_CD_FLAG_BITS = 5
109_CD_COMPRESS_TYPE = 6
110_CD_TIME = 7
111_CD_DATE = 8
112_CD_CRC = 9
113_CD_COMPRESSED_SIZE = 10
114_CD_UNCOMPRESSED_SIZE = 11
115_CD_FILENAME_LENGTH = 12
116_CD_EXTRA_FIELD_LENGTH = 13
117_CD_COMMENT_LENGTH = 14
118_CD_DISK_NUMBER_START = 15
119_CD_INTERNAL_FILE_ATTRIBUTES = 16
120_CD_EXTERNAL_FILE_ATTRIBUTES = 17
121_CD_LOCAL_HEADER_OFFSET = 18
122
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123# The "local file header" structure, magic number, size, and indices
124# (section V.A in the format document)
125structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000126stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000127sizeFileHeader = struct.calcsize(structFileHeader)
128
Fred Drake3e038e52001-02-28 17:56:26 +0000129_FH_SIGNATURE = 0
130_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_GENERAL_PURPOSE_FLAG_BITS = 3
133_FH_COMPRESSION_METHOD = 4
134_FH_LAST_MOD_TIME = 5
135_FH_LAST_MOD_DATE = 6
136_FH_CRC = 7
137_FH_COMPRESSED_SIZE = 8
138_FH_UNCOMPRESSED_SIZE = 9
139_FH_FILENAME_LENGTH = 10
140_FH_EXTRA_FIELD_LENGTH = 11
141
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000143structEndArchive64Locator = "<4sLQL"
144stringEndArchive64Locator = b"PK\x06\x07"
145sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000146
147# The "Zip64 end of central directory" record, magic number, size, and indices
148# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000149structEndArchive64 = "<4sQ2H2L4Q"
150stringEndArchive64 = b"PK\x06\x06"
151sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000152
153_CD64_SIGNATURE = 0
154_CD64_DIRECTORY_RECSIZE = 1
155_CD64_CREATE_VERSION = 2
156_CD64_EXTRACT_VERSION = 3
157_CD64_DISK_NUMBER = 4
158_CD64_DISK_NUMBER_START = 5
159_CD64_NUMBER_ENTRIES_THIS_DISK = 6
160_CD64_NUMBER_ENTRIES_TOTAL = 7
161_CD64_DIRECTORY_SIZE = 8
162_CD64_OFFSET_START_CENTDIR = 9
163
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166 if _EndRecData(fp):
167 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200168 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000170 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000172def is_zipfile(filename):
173 """Quickly see if a file is a ZIP file by checking the magic number.
174
175 The filename argument may be a file or file-like object too.
176 """
177 result = False
178 try:
179 if hasattr(filename, "read"):
180 result = _check_zipfile(fp=filename)
181 else:
182 with open(filename, "rb") as fp:
183 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200184 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185 pass
186 return result
187
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188def _EndRecData64(fpin, offset, endrec):
189 """
190 Read the ZIP64 end-of-archive records and use that to update endrec
191 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000192 try:
193 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200194 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 # If the seek fails, the file is not large enough to contain a ZIP64
196 # end-of-archive record, so just return the end record we were given.
197 return endrec
198
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000199 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200200 if len(data) != sizeEndCentDir64Locator:
201 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000202 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
203 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 return endrec
205
206 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000207 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208
209 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
211 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200212 if len(data) != sizeEndCentDir64:
213 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200215 dircount, dircount2, dirsize, diroffset = \
216 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000218 return endrec
219
220 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000221 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222 endrec[_ECD_DISK_NUMBER] = disk_num
223 endrec[_ECD_DISK_START] = disk_dir
224 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
225 endrec[_ECD_ENTRIES_TOTAL] = dircount2
226 endrec[_ECD_SIZE] = dirsize
227 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
230
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000231def _EndRecData(fpin):
232 """Return data from the "End of Central Directory" record, or None.
233
234 The data is a list of the nine items in the ZIP "End of central dir"
235 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236
237 # Determine file size
238 fpin.seek(0, 2)
239 filesize = fpin.tell()
240
241 # Check to see if this is ZIP file with no archive comment (the
242 # "end of central directory" structure should be the last item in the
243 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000244 try:
245 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200246 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200249 if (len(data) == sizeEndCentDir and
250 data[0:4] == stringEndArchive and
251 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000253 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec=list(endrec)
255
256 # Append a blank comment and record start offset
257 endrec.append(b"")
258 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000260 # Try to read the "Zip64 end of central directory" structure
261 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
263 # Either this is not a ZIP file, or it is a ZIP file with an archive
264 # comment. Search the end of the file for the "end of central directory"
265 # record signature. The comment is the last item in the ZIP file and may be
266 # up to 64K long. It is assumed that the "end of central directory" magic
267 # number does not appear in the comment.
268 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
269 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000270 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000271 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000272 if start >= 0:
273 # found the magic number; attempt to unpack and interpret
274 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200275 if len(recData) != sizeEndCentDir:
276 # Zip file is corrupted.
277 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000278 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400279 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
280 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
281 endrec.append(comment)
282 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000283
R David Murray4fbb9db2011-06-09 15:50:51 -0400284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, maxCommentStart + start - filesize,
286 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000287
288 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200289 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000290
Fred Drake484d7352000-10-02 21:14:52 +0000291
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000293 """Class with attributes describing each file in the ZIP archive."""
294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200296 'orig_filename',
297 'filename',
298 'date_time',
299 'compress_type',
300 'comment',
301 'extra',
302 'create_system',
303 'create_version',
304 'extract_version',
305 'reserved',
306 'flag_bits',
307 'volume',
308 'internal_attr',
309 'external_attr',
310 'header_offset',
311 'CRC',
312 'compress_size',
313 'file_size',
314 '_raw_time',
315 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000318 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
320 # Terminate the file name at the first null byte. Null bytes in file
321 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000322 null_byte = filename.find(chr(0))
323 if null_byte >= 0:
324 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325 # This is used to ensure paths in generated ZIP files always use
326 # forward slashes as the directory separator, as required by the
327 # ZIP format specification.
328 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000329 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330
Greg Ward8e36d282003-06-18 00:53:06 +0000331 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000332 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800333
334 if date_time[0] < 1980:
335 raise ValueError('ZIP does not support timestamps before 1980')
336
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
371 isdir = self.filename[-1:] == '/'
372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000439 if tp == 1:
440 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 0:
447 counts = ()
448 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000449 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
451 idx = 0
452
453 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000454 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455 self.file_size = counts[idx]
456 idx += 1
457
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000458 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.compress_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 old = self.header_offset
464 self.header_offset = counts[idx]
465 idx+=1
466
467 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468
469
Thomas Wouterscf297e42007-02-23 15:07:44 +0000470class _ZipDecrypter:
471 """Class to handle decryption of files stored within a ZIP archive.
472
473 ZIP supports a password-based form of encryption. Even though known
474 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000475 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000476
477 Usage:
478 zd = _ZipDecrypter(mypwd)
479 plain_char = zd(cypher_char)
480 plain_text = map(zd, cypher_text)
481 """
482
483 def _GenerateCRCTable():
484 """Generate a CRC-32 table.
485
486 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
487 internal keys. We noticed that a direct implementation is faster than
488 relying on binascii.crc32().
489 """
490 poly = 0xedb88320
491 table = [0] * 256
492 for i in range(256):
493 crc = i
494 for j in range(8):
495 if crc & 1:
496 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
497 else:
498 crc = ((crc >> 1) & 0x7FFFFFFF)
499 table[i] = crc
500 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500501 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502
503 def _crc32(self, ch, crc):
504 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000505 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000506
507 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500508 if _ZipDecrypter.crctable is None:
509 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000510 self.key0 = 305419896
511 self.key1 = 591751049
512 self.key2 = 878082192
513 for p in pwd:
514 self._UpdateKeys(p)
515
516 def _UpdateKeys(self, c):
517 self.key0 = self._crc32(c, self.key0)
518 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
519 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000520 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000521
522 def __call__(self, c):
523 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000524 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000525 k = self.key2 | 2
526 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000527 self._UpdateKeys(c)
528 return c
529
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200530
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200531class LZMACompressor:
532
533 def __init__(self):
534 self._comp = None
535
536 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200537 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200538 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200539 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200540 ])
541 return struct.pack('<BBH', 9, 4, len(props)) + props
542
543 def compress(self, data):
544 if self._comp is None:
545 return self._init() + self._comp.compress(data)
546 return self._comp.compress(data)
547
548 def flush(self):
549 if self._comp is None:
550 return self._init() + self._comp.flush()
551 return self._comp.flush()
552
553
554class LZMADecompressor:
555
556 def __init__(self):
557 self._decomp = None
558 self._unconsumed = b''
559 self.eof = False
560
561 def decompress(self, data):
562 if self._decomp is None:
563 self._unconsumed += data
564 if len(self._unconsumed) <= 4:
565 return b''
566 psize, = struct.unpack('<H', self._unconsumed[2:4])
567 if len(self._unconsumed) <= 4 + psize:
568 return b''
569
570 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200571 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
572 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200573 ])
574 data = self._unconsumed[4 + psize:]
575 del self._unconsumed
576
577 result = self._decomp.decompress(data)
578 self.eof = self._decomp.eof
579 return result
580
581
582compressor_names = {
583 0: 'store',
584 1: 'shrink',
585 2: 'reduce',
586 3: 'reduce',
587 4: 'reduce',
588 5: 'reduce',
589 6: 'implode',
590 7: 'tokenize',
591 8: 'deflate',
592 9: 'deflate64',
593 10: 'implode',
594 12: 'bzip2',
595 14: 'lzma',
596 18: 'terse',
597 19: 'lz77',
598 97: 'wavpack',
599 98: 'ppmd',
600}
601
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200602def _check_compression(compression):
603 if compression == ZIP_STORED:
604 pass
605 elif compression == ZIP_DEFLATED:
606 if not zlib:
607 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200608 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609 elif compression == ZIP_BZIP2:
610 if not bz2:
611 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200612 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200613 elif compression == ZIP_LZMA:
614 if not lzma:
615 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200616 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200617 else:
618 raise RuntimeError("That compression method is not supported")
619
620
621def _get_compressor(compress_type):
622 if compress_type == ZIP_DEFLATED:
623 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200624 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200625 elif compress_type == ZIP_BZIP2:
626 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200627 elif compress_type == ZIP_LZMA:
628 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200629 else:
630 return None
631
632
633def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200634 if compress_type == ZIP_STORED:
635 return None
636 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 return zlib.decompressobj(-15)
638 elif compress_type == ZIP_BZIP2:
639 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200640 elif compress_type == ZIP_LZMA:
641 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200642 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200643 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200644 if descr:
645 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
646 else:
647 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200648
649
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200650class _SharedFile:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200651 def __init__(self, file, pos, close, lock):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200652 self._file = file
653 self._pos = pos
654 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200655 self._lock = lock
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200656
657 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200658 with self._lock:
659 self._file.seek(self._pos)
660 data = self._file.read(n)
661 self._pos = self._file.tell()
662 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200663
664 def close(self):
665 if self._file is not None:
666 fileobj = self._file
667 self._file = None
668 self._close(fileobj)
669
670
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000671class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000672 """File-like object for reading an archive member.
673 Is returned by ZipFile.open().
674 """
675
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000676 # Max size supported by decompressor.
677 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000678
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000679 # Read from compressed files in 4k blocks.
680 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000681
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000682 # Search for universal newlines or line chunks.
683 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
684
Łukasz Langae94980a2010-11-22 23:31:26 +0000685 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
686 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000687 self._fileobj = fileobj
688 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000689 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000690
Ezio Melotti92b47432010-01-28 01:44:41 +0000691 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000692 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200693 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000694
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200695 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000696
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200697 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000698 self._readbuffer = b''
699 self._offset = 0
700
701 self._universal = 'U' in mode
702 self.newlines = None
703
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000704 # Adjust read size for encrypted files since the first 12 bytes
705 # are for the encryption/password information.
706 if self._decrypter is not None:
707 self._compress_left -= 12
708
709 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000710 self.name = zipinfo.filename
711
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000712 if hasattr(zipinfo, 'CRC'):
713 self._expected_crc = zipinfo.CRC
714 self._running_crc = crc32(b'') & 0xffffffff
715 else:
716 self._expected_crc = None
717
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200718 def __repr__(self):
719 result = ['<%s.%s' % (self.__class__.__module__,
720 self.__class__.__qualname__)]
721 if not self.closed:
722 result.append(' name=%r mode=%r' % (self.name, self.mode))
723 if self._compress_type != ZIP_STORED:
724 result.append(' compress_type=%s' %
725 compressor_names.get(self._compress_type,
726 self._compress_type))
727 else:
728 result.append(' [closed]')
729 result.append('>')
730 return ''.join(result)
731
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000732 def readline(self, limit=-1):
733 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000734
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000735 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000736 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000737
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000738 if not self._universal and limit < 0:
739 # Shortcut common case - newline found in buffer.
740 i = self._readbuffer.find(b'\n', self._offset) + 1
741 if i > 0:
742 line = self._readbuffer[self._offset: i]
743 self._offset = i
744 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000745
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000746 if not self._universal:
747 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000748
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000749 line = b''
750 while limit < 0 or len(line) < limit:
751 readahead = self.peek(2)
752 if readahead == b'':
753 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 #
756 # Search for universal newlines or line chunks.
757 #
758 # The pattern returns either a line chunk or a newline, but not
759 # both. Combined with peek(2), we are assured that the sequence
760 # '\r\n' is always retrieved completely and never split into
761 # separate newlines - '\r', '\n' due to coincidental readaheads.
762 #
763 match = self.PATTERN.search(readahead)
764 newline = match.group('newline')
765 if newline is not None:
766 if self.newlines is None:
767 self.newlines = []
768 if newline not in self.newlines:
769 self.newlines.append(newline)
770 self._offset += len(newline)
771 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000773 chunk = match.group('chunk')
774 if limit >= 0:
775 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 self._offset += len(chunk)
778 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 return line
781
782 def peek(self, n=1):
783 """Returns buffered bytes without advancing the position."""
784 if n > len(self._readbuffer) - self._offset:
785 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200786 if len(chunk) > self._offset:
787 self._readbuffer = chunk + self._readbuffer[self._offset:]
788 self._offset = 0
789 else:
790 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000791
792 # Return up to 512 bytes to reduce allocation overhead for tight loops.
793 return self._readbuffer[self._offset: self._offset + 512]
794
795 def readable(self):
796 return True
797
798 def read(self, n=-1):
799 """Read and return up to n bytes.
800 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000801 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200802 if n is None or n < 0:
803 buf = self._readbuffer[self._offset:]
804 self._readbuffer = b''
805 self._offset = 0
806 while not self._eof:
807 buf += self._read1(self.MAX_N)
808 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809
Antoine Pitrou78157b32012-06-23 16:44:48 +0200810 end = n + self._offset
811 if end < len(self._readbuffer):
812 buf = self._readbuffer[self._offset:end]
813 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200814 return buf
815
Antoine Pitrou78157b32012-06-23 16:44:48 +0200816 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200817 buf = self._readbuffer[self._offset:]
818 self._readbuffer = b''
819 self._offset = 0
820 while n > 0 and not self._eof:
821 data = self._read1(n)
822 if n < len(data):
823 self._readbuffer = data
824 self._offset = n
825 buf += data[:n]
826 break
827 buf += data
828 n -= len(data)
829 return buf
830
831 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000832 # Update the CRC using the given data.
833 if self._expected_crc is None:
834 # No need to compute the CRC if we don't have a reference value
835 return
836 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
837 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200838 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000839 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000840
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000841 def read1(self, n):
842 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200844 if n is None or n < 0:
845 buf = self._readbuffer[self._offset:]
846 self._readbuffer = b''
847 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300848 while not self._eof:
849 data = self._read1(self.MAX_N)
850 if data:
851 buf += data
852 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200853 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
Antoine Pitrou78157b32012-06-23 16:44:48 +0200855 end = n + self._offset
856 if end < len(self._readbuffer):
857 buf = self._readbuffer[self._offset:end]
858 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200859 return buf
860
Antoine Pitrou78157b32012-06-23 16:44:48 +0200861 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200862 buf = self._readbuffer[self._offset:]
863 self._readbuffer = b''
864 self._offset = 0
865 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300866 while not self._eof:
867 data = self._read1(n)
868 if n < len(data):
869 self._readbuffer = data
870 self._offset = n
871 buf += data[:n]
872 break
873 if data:
874 buf += data
875 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200876 return buf
877
878 def _read1(self, n):
879 # Read up to n compressed bytes with at most one read() system call,
880 # decrypt and decompress them.
881 if self._eof or n <= 0:
882 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000884 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200885 if self._compress_type == ZIP_DEFLATED:
886 ## Handle unconsumed data.
887 data = self._decompressor.unconsumed_tail
888 if n > len(data):
889 data += self._read2(n - len(data))
890 else:
891 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200893 if self._compress_type == ZIP_STORED:
894 self._eof = self._compress_left <= 0
895 elif self._compress_type == ZIP_DEFLATED:
896 n = max(n, self.MIN_READ_SIZE)
897 data = self._decompressor.decompress(data, n)
898 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200899 self._compress_left <= 0 and
900 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200901 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000902 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 else:
904 data = self._decompressor.decompress(data)
905 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200907 data = data[:self._left]
908 self._left -= len(data)
909 if self._left <= 0:
910 self._eof = True
911 self._update_crc(data)
912 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000913
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 def _read2(self, n):
915 if self._compress_left <= 0:
916 return b''
917
918 n = max(n, self.MIN_READ_SIZE)
919 n = min(n, self._compress_left)
920
921 data = self._fileobj.read(n)
922 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200923 if not data:
924 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200925
926 if self._decrypter is not None:
927 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000928 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929
Łukasz Langae94980a2010-11-22 23:31:26 +0000930 def close(self):
931 try:
932 if self._close_fileobj:
933 self._fileobj.close()
934 finally:
935 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000937
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000938class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000939 """ Class with methods to open, read, write, close, list zip files.
940
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200941 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000942
Fred Drake3d9091e2001-03-26 15:49:24 +0000943 file: Either the path to the file, or a file-like object.
944 If it is a path, the file will be opened and closed by ZipFile.
945 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200946 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
947 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000948 allowZip64: if True ZipFile will create files with ZIP64 extensions when
949 needed, otherwise it will raise an exception when this would
950 be necessary.
951
Fred Drake3d9091e2001-03-26 15:49:24 +0000952 """
Fred Drake484d7352000-10-02 21:14:52 +0000953
Fred Drake90eac282001-02-28 05:29:34 +0000954 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800955 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000956
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200957 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000958 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000959 if mode not in ("r", "w", "a"):
960 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
961
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000963
964 self._allowZip64 = allowZip64
965 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000966 self.debug = 0 # Level of printing: 0 through 3
967 self.NameToInfo = {} # Find file info given name
968 self.filelist = [] # List of ZipInfo instances for archive
969 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200970 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +0000971 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400972 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000973
Fred Drake3d9091e2001-03-26 15:49:24 +0000974 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000975 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000976 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000977 self._filePassed = 0
978 self.filename = file
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200979 modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
980 'r+b': 'w+b', 'w+b': 'wb'}
981 filemode = modeDict[mode]
982 while True:
983 try:
984 self.fp = io.open(file, filemode)
985 except OSError:
986 if filemode in modeDict:
987 filemode = modeDict[filemode]
988 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +0000989 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200990 break
Fred Drake3d9091e2001-03-26 15:49:24 +0000991 else:
992 self._filePassed = 1
993 self.fp = file
994 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200995 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200996 self._lock = threading.RLock()
Tim Petersa19a1682001-03-29 04:36:09 +0000997
Antoine Pitrou17babc52012-11-17 23:50:08 +0100998 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200999 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001000 self._RealGetContents()
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001001 elif mode == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +00001002 # set the modified flag so central directory gets written
1003 # even if no files are added to the archive
1004 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001005 self.start_dir = 0
1006 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001007 try:
1008 # See if file is a zip file
1009 self._RealGetContents()
1010 # seek to start of directory and overwrite
1011 self.fp.seek(self.start_dir, 0)
1012 except BadZipFile:
1013 # file is not a zip file, just append
1014 self.fp.seek(0, 2)
1015
1016 # set the modified flag so central directory gets written
1017 # even if no files are added to the archive
1018 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001019 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001020 else:
1021 raise RuntimeError('Mode must be "r", "w" or "a"')
1022 except:
1023 fp = self.fp
1024 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001025 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001026 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001028 def __enter__(self):
1029 return self
1030
1031 def __exit__(self, type, value, traceback):
1032 self.close()
1033
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001034 def __repr__(self):
1035 result = ['<%s.%s' % (self.__class__.__module__,
1036 self.__class__.__qualname__)]
1037 if self.fp is not None:
1038 if self._filePassed:
1039 result.append(' file=%r' % self.fp)
1040 elif self.filename is not None:
1041 result.append(' filename=%r' % self.filename)
1042 result.append(' mode=%r' % self.mode)
1043 else:
1044 result.append(' [closed]')
1045 result.append('>')
1046 return ''.join(result)
1047
Tim Peters7d3bad62001-04-04 18:56:49 +00001048 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001049 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001051 try:
1052 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001053 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001054 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001055 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001056 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001057 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001058 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001059 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1060 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001061 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001062
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001064 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001065 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1066 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001067 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1068
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001069 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001070 inferred = concat + offset_cd
1071 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 # self.start_dir: Position of start of central directory
1073 self.start_dir = offset_cd + concat
1074 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001075 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001076 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001077 total = 0
1078 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001079 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001080 if len(centdir) != sizeCentralDir:
1081 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001083 if centdir[_CD_SIGNATURE] != stringCentralDir:
1084 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001086 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001087 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001088 flags = centdir[5]
1089 if flags & 0x800:
1090 # UTF-8 file names extension
1091 filename = filename.decode('utf-8')
1092 else:
1093 # Historical ZIP filename encoding
1094 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001096 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001097 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1098 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001099 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001101 x.flag_bits, x.compress_type, t, d,
1102 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001103 if x.extract_version > MAX_EXTRACT_VERSION:
1104 raise NotImplementedError("zip file version %.1f" %
1105 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1107 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001108 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001110 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001111
1112 x._decodeExtra()
1113 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 self.filelist.append(x)
1115 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001116
1117 # update total bytes read from central directory
1118 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1119 + centdir[_CD_EXTRA_FIELD_LENGTH]
1120 + centdir[_CD_COMMENT_LENGTH])
1121
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001123 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001124
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125
1126 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001127 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001128 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129
1130 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001131 """Return a list of class ZipInfo instances for files in the
1132 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001133 return self.filelist
1134
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001135 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001136 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001137 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1138 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001140 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001141 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1142 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143
1144 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001145 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001146 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001147 for zinfo in self.filelist:
1148 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001149 # Read by chunks, to avoid an OverflowError or a
1150 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001151 with self.open(zinfo.filename, "r") as f:
1152 while f.read(chunk_size): # Check CRC-32
1153 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001154 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001155 return zinfo.filename
1156
1157 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001158 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001159 info = self.NameToInfo.get(name)
1160 if info is None:
1161 raise KeyError(
1162 'There is no item named %r in the archive' % name)
1163
1164 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001165
Thomas Wouterscf297e42007-02-23 15:07:44 +00001166 def setpassword(self, pwd):
1167 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001168 if pwd and not isinstance(pwd, bytes):
1169 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1170 if pwd:
1171 self.pwd = pwd
1172 else:
1173 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001174
R David Murrayf50b38a2012-04-12 18:44:58 -04001175 @property
1176 def comment(self):
1177 """The comment text associated with the ZIP file."""
1178 return self._comment
1179
1180 @comment.setter
1181 def comment(self, comment):
1182 if not isinstance(comment, bytes):
1183 raise TypeError("comment: expected bytes, got %s" % type(comment))
1184 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001185 if len(comment) > ZIP_MAX_COMMENT:
1186 import warnings
1187 warnings.warn('Archive comment is too long; truncating to %d bytes'
1188 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001189 comment = comment[:ZIP_MAX_COMMENT]
1190 self._comment = comment
1191 self._didModify = True
1192
Thomas Wouterscf297e42007-02-23 15:07:44 +00001193 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001194 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001195 with self.open(name, "r", pwd) as fp:
1196 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001197
1198 def open(self, name, mode="r", pwd=None):
1199 """Return file-like object for 'name'."""
1200 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001201 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001202 if 'U' in mode:
1203 import warnings
1204 warnings.warn("'U' mode is deprecated",
1205 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001206 if pwd and not isinstance(pwd, bytes):
1207 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001208 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001209 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001210 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001211
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001212 # Make sure we have an info object
1213 if isinstance(name, ZipInfo):
1214 # 'name' is already an info object
1215 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001217 # Get info object for name
1218 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001219
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001220 self._fileRefCnt += 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001221 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001222 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001223 # Skip the file header:
1224 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001225 if len(fheader) != sizeFileHeader:
1226 raise BadZipFile("Truncated file header")
1227 fheader = struct.unpack(structFileHeader, fheader)
1228 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001229 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230
Antoine Pitrou17babc52012-11-17 23:50:08 +01001231 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1232 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1233 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001234
Antoine Pitrou8572da52012-11-17 23:52:05 +01001235 if zinfo.flag_bits & 0x20:
1236 # Zip 2.7: compressed patched data
1237 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001238
Antoine Pitrou8572da52012-11-17 23:52:05 +01001239 if zinfo.flag_bits & 0x40:
1240 # strong encryption
1241 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001242
Antoine Pitrou17babc52012-11-17 23:50:08 +01001243 if zinfo.flag_bits & 0x800:
1244 # UTF-8 filename
1245 fname_str = fname.decode("utf-8")
1246 else:
1247 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001248
Antoine Pitrou17babc52012-11-17 23:50:08 +01001249 if fname_str != zinfo.orig_filename:
1250 raise BadZipFile(
1251 'File name in directory %r and header %r differ.'
1252 % (zinfo.orig_filename, fname))
1253
1254 # check for encrypted flag & handle password
1255 is_encrypted = zinfo.flag_bits & 0x1
1256 zd = None
1257 if is_encrypted:
1258 if not pwd:
1259 pwd = self.pwd
1260 if not pwd:
1261 raise RuntimeError("File %s is encrypted, password "
1262 "required for extraction" % name)
1263
1264 zd = _ZipDecrypter(pwd)
1265 # The first 12 bytes in the cypher stream is an encryption header
1266 # used to strengthen the algorithm. The first 11 bytes are
1267 # completely random, while the 12th contains the MSB of the CRC,
1268 # or the MSB of the file time depending on the header type
1269 # and is used to check the correctness of the password.
1270 header = zef_file.read(12)
1271 h = list(map(zd, header[0:12]))
1272 if zinfo.flag_bits & 0x8:
1273 # compare against the file type from extended local headers
1274 check_byte = (zinfo._raw_time >> 8) & 0xff
1275 else:
1276 # compare against the CRC otherwise
1277 check_byte = (zinfo.CRC >> 24) & 0xff
1278 if h[11] != check_byte:
1279 raise RuntimeError("Bad password for file", name)
1280
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001281 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001282 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001283 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001284 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001285
Christian Heimes790c8232008-01-07 21:14:23 +00001286 def extract(self, member, path=None, pwd=None):
1287 """Extract a member from the archive to the current working directory,
1288 using its full name. Its file information is extracted as accurately
1289 as possible. `member' may be a filename or a ZipInfo object. You can
1290 specify a different directory using `path'.
1291 """
1292 if not isinstance(member, ZipInfo):
1293 member = self.getinfo(member)
1294
1295 if path is None:
1296 path = os.getcwd()
1297
1298 return self._extract_member(member, path, pwd)
1299
1300 def extractall(self, path=None, members=None, pwd=None):
1301 """Extract all members from the archive to the current working
1302 directory. `path' specifies a different directory to extract to.
1303 `members' is optional and must be a subset of the list returned
1304 by namelist().
1305 """
1306 if members is None:
1307 members = self.namelist()
1308
1309 for zipinfo in members:
1310 self.extract(zipinfo, path, pwd)
1311
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001312 @classmethod
1313 def _sanitize_windows_name(cls, arcname, pathsep):
1314 """Replace bad characters and remove trailing dots from parts."""
1315 table = cls._windows_illegal_name_trans_table
1316 if not table:
1317 illegal = ':<>|"?*'
1318 table = str.maketrans(illegal, '_' * len(illegal))
1319 cls._windows_illegal_name_trans_table = table
1320 arcname = arcname.translate(table)
1321 # remove trailing dots
1322 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1323 # rejoin, removing empty parts.
1324 arcname = pathsep.join(x for x in arcname if x)
1325 return arcname
1326
Christian Heimes790c8232008-01-07 21:14:23 +00001327 def _extract_member(self, member, targetpath, pwd):
1328 """Extract the ZipInfo object 'member' to a physical
1329 file on the path targetpath.
1330 """
1331 # build the destination pathname, replacing
1332 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001333 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001334
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001335 if os.path.altsep:
1336 arcname = arcname.replace(os.path.altsep, os.path.sep)
1337 # interpret absolute pathname as relative, remove drive letter or
1338 # UNC path, redundant separators, "." and ".." components.
1339 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001340 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001341 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001342 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001343 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001344 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001345 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001346
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001347 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001348 targetpath = os.path.normpath(targetpath)
1349
1350 # Create all upper directories if necessary.
1351 upperdirs = os.path.dirname(targetpath)
1352 if upperdirs and not os.path.exists(upperdirs):
1353 os.makedirs(upperdirs)
1354
Martin v. Löwis59e47792009-01-24 14:10:07 +00001355 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001356 if not os.path.isdir(targetpath):
1357 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001358 return targetpath
1359
Antoine Pitrou17babc52012-11-17 23:50:08 +01001360 with self.open(member, pwd=pwd) as source, \
1361 open(targetpath, "wb") as target:
1362 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001363
1364 return targetpath
1365
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001367 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001368 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001369 import warnings
1370 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001372 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001374 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001375 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001376 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001377 if not self._allowZip64:
1378 requires_zip64 = None
1379 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1380 requires_zip64 = "Files count"
1381 elif zinfo.file_size > ZIP64_LIMIT:
1382 requires_zip64 = "Filesize"
1383 elif zinfo.header_offset > ZIP64_LIMIT:
1384 requires_zip64 = "Zipfile size"
1385 if requires_zip64:
1386 raise LargeZipFile(requires_zip64 +
1387 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388
1389 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001390 """Put the bytes from filename into the archive under the name
1391 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001392 if not self.fp:
1393 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001394 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001395
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001397 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001398 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001399 date_time = mtime[0:6]
1400 # Create ZipInfo instance to store file information
1401 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001402 arcname = filename
1403 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1404 while arcname[0] in (os.sep, os.altsep):
1405 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001406 if isdir:
1407 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001408 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001409 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001410 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001411 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001412 else:
Tim Peterse1190062001-01-15 03:34:38 +00001413 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001414
1415 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001416 zinfo.flag_bits = 0x00
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001417 with self._lock:
1418 self.fp.seek(self.start_dir, 0)
1419 zinfo.header_offset = self.fp.tell() # Start of header bytes
1420 if zinfo.compress_type == ZIP_LZMA:
1421 # Compressed data includes an end-of-stream (EOS) marker
1422 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001423
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001424 self._writecheck(zinfo)
1425 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001426
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001427 if isdir:
1428 zinfo.file_size = 0
1429 zinfo.compress_size = 0
1430 zinfo.CRC = 0
1431 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1432 self.filelist.append(zinfo)
1433 self.NameToInfo[zinfo.filename] = zinfo
1434 self.fp.write(zinfo.FileHeader(False))
1435 self.start_dir = self.fp.tell()
1436 return
1437
1438 cmpr = _get_compressor(zinfo.compress_type)
1439 with open(filename, "rb") as fp:
1440 # Must overwrite CRC and sizes with correct data later
1441 zinfo.CRC = CRC = 0
1442 zinfo.compress_size = compress_size = 0
1443 # Compressed size can be larger than uncompressed size
1444 zip64 = self._allowZip64 and \
1445 zinfo.file_size * 1.05 > ZIP64_LIMIT
1446 self.fp.write(zinfo.FileHeader(zip64))
1447 file_size = 0
1448 while 1:
1449 buf = fp.read(1024 * 8)
1450 if not buf:
1451 break
1452 file_size = file_size + len(buf)
1453 CRC = crc32(buf, CRC) & 0xffffffff
1454 if cmpr:
1455 buf = cmpr.compress(buf)
1456 compress_size = compress_size + len(buf)
1457 self.fp.write(buf)
1458 if cmpr:
1459 buf = cmpr.flush()
1460 compress_size = compress_size + len(buf)
1461 self.fp.write(buf)
1462 zinfo.compress_size = compress_size
1463 else:
1464 zinfo.compress_size = file_size
1465 zinfo.CRC = CRC
1466 zinfo.file_size = file_size
1467 if not zip64 and self._allowZip64:
1468 if file_size > ZIP64_LIMIT:
1469 raise RuntimeError('File size has increased during compressing')
1470 if compress_size > ZIP64_LIMIT:
1471 raise RuntimeError('Compressed size larger than uncompressed size')
1472 # Seek backwards and write file header (which will now include
1473 # correct CRC and file sizes)
1474 self.start_dir = self.fp.tell() # Preserve current position in file
1475 self.fp.seek(zinfo.header_offset, 0)
1476 self.fp.write(zinfo.FileHeader(zip64))
1477 self.fp.seek(self.start_dir, 0)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001478 self.filelist.append(zinfo)
1479 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001480
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001481 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001482 """Write a file into the archive. The contents is 'data', which
1483 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1484 it is encoded as UTF-8 first.
1485 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001486 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001487 if isinstance(data, str):
1488 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001489 if not isinstance(zinfo_or_arcname, ZipInfo):
1490 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001491 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001492 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001493 if zinfo.filename[-1] == '/':
1494 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1495 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1496 else:
1497 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001498 else:
1499 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001500
1501 if not self.fp:
1502 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001503 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001504
Guido van Rossum85825dc2007-08-27 17:03:28 +00001505 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001506 with self._lock:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001507 try:
1508 self.fp.seek(self.start_dir)
1509 except (AttributeError, io.UnsupportedOperation):
1510 # Some file-like objects can provide tell() but not seek()
1511 pass
1512 zinfo.header_offset = self.fp.tell() # Start of header data
1513 if compress_type is not None:
1514 zinfo.compress_type = compress_type
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001515 zinfo.header_offset = self.fp.tell() # Start of header data
1516 if compress_type is not None:
1517 zinfo.compress_type = compress_type
1518 if zinfo.compress_type == ZIP_LZMA:
1519 # Compressed data includes an end-of-stream (EOS) marker
1520 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001521
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001522 self._writecheck(zinfo)
1523 self._didModify = True
1524 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1525 co = _get_compressor(zinfo.compress_type)
1526 if co:
1527 data = co.compress(data) + co.flush()
1528 zinfo.compress_size = len(data) # Compressed size
1529 else:
1530 zinfo.compress_size = zinfo.file_size
1531 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1532 zinfo.compress_size > ZIP64_LIMIT
1533 if zip64 and not self._allowZip64:
1534 raise LargeZipFile("Filesize would require ZIP64 extensions")
1535 self.fp.write(zinfo.FileHeader(zip64))
1536 self.fp.write(data)
1537 if zinfo.flag_bits & 0x08:
1538 # Write CRC and file sizes after the file data
1539 fmt = '<LQQ' if zip64 else '<LLL'
1540 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1541 zinfo.file_size))
1542 self.fp.flush()
1543 self.start_dir = self.fp.tell()
1544 self.filelist.append(zinfo)
1545 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001546
1547 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001548 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001549 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001550
1551 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001552 """Close the file, and for mode "w" and "a" write the ending
1553 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001554 if self.fp is None:
1555 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001556
Antoine Pitrou17babc52012-11-17 23:50:08 +01001557 try:
1558 if self.mode in ("w", "a") and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001559 with self._lock:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001560 try:
1561 self.fp.seek(self.start_dir)
1562 except (AttributeError, io.UnsupportedOperation):
1563 # Some file-like objects can provide tell() but not seek()
1564 pass
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001565 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001566 finally:
1567 fp = self.fp
1568 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001569 self._fpclose(fp)
1570
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001571 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001572 for zinfo in self.filelist: # write central directory
1573 dt = zinfo.date_time
1574 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1575 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1576 extra = []
1577 if zinfo.file_size > ZIP64_LIMIT \
1578 or zinfo.compress_size > ZIP64_LIMIT:
1579 extra.append(zinfo.file_size)
1580 extra.append(zinfo.compress_size)
1581 file_size = 0xffffffff
1582 compress_size = 0xffffffff
1583 else:
1584 file_size = zinfo.file_size
1585 compress_size = zinfo.compress_size
1586
1587 if zinfo.header_offset > ZIP64_LIMIT:
1588 extra.append(zinfo.header_offset)
1589 header_offset = 0xffffffff
1590 else:
1591 header_offset = zinfo.header_offset
1592
1593 extra_data = zinfo.extra
1594 min_version = 0
1595 if extra:
1596 # Append a ZIP64 field to the extra's
1597 extra_data = struct.pack(
1598 '<HH' + 'Q'*len(extra),
1599 1, 8*len(extra), *extra) + extra_data
1600
1601 min_version = ZIP64_VERSION
1602
1603 if zinfo.compress_type == ZIP_BZIP2:
1604 min_version = max(BZIP2_VERSION, min_version)
1605 elif zinfo.compress_type == ZIP_LZMA:
1606 min_version = max(LZMA_VERSION, min_version)
1607
1608 extract_version = max(min_version, zinfo.extract_version)
1609 create_version = max(min_version, zinfo.create_version)
1610 try:
1611 filename, flag_bits = zinfo._encodeFilenameFlags()
1612 centdir = struct.pack(structCentralDir,
1613 stringCentralDir, create_version,
1614 zinfo.create_system, extract_version, zinfo.reserved,
1615 flag_bits, zinfo.compress_type, dostime, dosdate,
1616 zinfo.CRC, compress_size, file_size,
1617 len(filename), len(extra_data), len(zinfo.comment),
1618 0, zinfo.internal_attr, zinfo.external_attr,
1619 header_offset)
1620 except DeprecationWarning:
1621 print((structCentralDir, stringCentralDir, create_version,
1622 zinfo.create_system, extract_version, zinfo.reserved,
1623 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1624 zinfo.CRC, compress_size, file_size,
1625 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1626 0, zinfo.internal_attr, zinfo.external_attr,
1627 header_offset), file=sys.stderr)
1628 raise
1629 self.fp.write(centdir)
1630 self.fp.write(filename)
1631 self.fp.write(extra_data)
1632 self.fp.write(zinfo.comment)
1633
1634 pos2 = self.fp.tell()
1635 # Write end-of-zip-archive record
1636 centDirCount = len(self.filelist)
1637 centDirSize = pos2 - self.start_dir
1638 centDirOffset = self.start_dir
1639 requires_zip64 = None
1640 if centDirCount > ZIP_FILECOUNT_LIMIT:
1641 requires_zip64 = "Files count"
1642 elif centDirOffset > ZIP64_LIMIT:
1643 requires_zip64 = "Central directory offset"
1644 elif centDirSize > ZIP64_LIMIT:
1645 requires_zip64 = "Central directory size"
1646 if requires_zip64:
1647 # Need to write the ZIP64 end-of-archive records
1648 if not self._allowZip64:
1649 raise LargeZipFile(requires_zip64 +
1650 " would require ZIP64 extensions")
1651 zip64endrec = struct.pack(
1652 structEndArchive64, stringEndArchive64,
1653 44, 45, 45, 0, 0, centDirCount, centDirCount,
1654 centDirSize, centDirOffset)
1655 self.fp.write(zip64endrec)
1656
1657 zip64locrec = struct.pack(
1658 structEndArchive64Locator,
1659 stringEndArchive64Locator, 0, pos2, 1)
1660 self.fp.write(zip64locrec)
1661 centDirCount = min(centDirCount, 0xFFFF)
1662 centDirSize = min(centDirSize, 0xFFFFFFFF)
1663 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1664
1665 endrec = struct.pack(structEndArchive, stringEndArchive,
1666 0, 0, centDirCount, centDirCount,
1667 centDirSize, centDirOffset, len(self._comment))
1668 self.fp.write(endrec)
1669 self.fp.write(self._comment)
1670 self.fp.flush()
1671
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001672 def _fpclose(self, fp):
1673 assert self._fileRefCnt > 0
1674 self._fileRefCnt -= 1
1675 if not self._fileRefCnt and not self._filePassed:
1676 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001677
1678
1679class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001680 """Class to create ZIP archives with Python library files and packages."""
1681
Georg Brandl8334fd92010-12-04 10:26:46 +00001682 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001683 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001684 ZipFile.__init__(self, file, mode=mode, compression=compression,
1685 allowZip64=allowZip64)
1686 self._optimize = optimize
1687
Christian Tismer59202e52013-10-21 03:59:23 +02001688 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001689 """Add all files from "pathname" to the ZIP archive.
1690
Fred Drake484d7352000-10-02 21:14:52 +00001691 If pathname is a package directory, search the directory and
1692 all package subdirectories recursively for all *.py and enter
1693 the modules into the archive. If pathname is a plain
1694 directory, listdir *.py and enter all modules. Else, pathname
1695 must be a Python *.py file and the module will be put into the
1696 archive. Added modules are always module.pyo or module.pyc.
1697 This method will compile the module.py into module.pyc if
1698 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001699 If filterfunc(pathname) is given, it is called with every argument.
1700 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001701 """
Christian Tismer59202e52013-10-21 03:59:23 +02001702 if filterfunc and not filterfunc(pathname):
1703 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001704 label = 'path' if os.path.isdir(pathname) else 'file'
1705 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001706 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001707 dir, name = os.path.split(pathname)
1708 if os.path.isdir(pathname):
1709 initname = os.path.join(pathname, "__init__.py")
1710 if os.path.isfile(initname):
1711 # This is a package directory, add it
1712 if basename:
1713 basename = "%s/%s" % (basename, name)
1714 else:
1715 basename = name
1716 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001717 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001718 fname, arcname = self._get_codename(initname[0:-3], basename)
1719 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001720 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001721 self.write(fname, arcname)
1722 dirlist = os.listdir(pathname)
1723 dirlist.remove("__init__.py")
1724 # Add all *.py files and package subdirectories
1725 for filename in dirlist:
1726 path = os.path.join(pathname, filename)
1727 root, ext = os.path.splitext(filename)
1728 if os.path.isdir(path):
1729 if os.path.isfile(os.path.join(path, "__init__.py")):
1730 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001731 self.writepy(path, basename,
1732 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001733 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001734 if filterfunc and not filterfunc(path):
1735 if self.debug:
1736 print('file "%s" skipped by filterfunc' % path)
1737 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001738 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001739 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001740 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001741 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001742 self.write(fname, arcname)
1743 else:
1744 # This is NOT a package directory, add its files at top level
1745 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001746 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001747 for filename in os.listdir(pathname):
1748 path = os.path.join(pathname, filename)
1749 root, ext = os.path.splitext(filename)
1750 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001751 if filterfunc and not filterfunc(path):
1752 if self.debug:
1753 print('file "%s" skipped by filterfunc' % path)
1754 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001755 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001756 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001757 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001758 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001759 self.write(fname, arcname)
1760 else:
1761 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001762 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001763 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001764 fname, arcname = self._get_codename(pathname[0:-3], basename)
1765 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001766 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001767 self.write(fname, arcname)
1768
1769 def _get_codename(self, pathname, basename):
1770 """Return (filename, archivename) for the path.
1771
Fred Drake484d7352000-10-02 21:14:52 +00001772 Given a module name path, return the correct file path and
1773 archive name, compiling if necessary. For example, given
1774 /python/lib/string, return (/python/lib/string.pyc, string).
1775 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001776 def _compile(file, optimize=-1):
1777 import py_compile
1778 if self.debug:
1779 print("Compiling", file)
1780 try:
1781 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001782 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001783 print(err.msg)
1784 return False
1785 return True
1786
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001787 file_py = pathname + ".py"
1788 file_pyc = pathname + ".pyc"
1789 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001790 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1791 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001792 if self._optimize == -1:
1793 # legacy mode: use whatever file is present
1794 if (os.path.isfile(file_pyo) and
1795 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1796 # Use .pyo file.
1797 arcname = fname = file_pyo
1798 elif (os.path.isfile(file_pyc) and
1799 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1800 # Use .pyc file.
1801 arcname = fname = file_pyc
1802 elif (os.path.isfile(pycache_pyc) and
1803 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1804 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1805 # file name in the archive.
1806 fname = pycache_pyc
1807 arcname = file_pyc
1808 elif (os.path.isfile(pycache_pyo) and
1809 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1810 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1811 # file name in the archive.
1812 fname = pycache_pyo
1813 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001814 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001815 # Compile py into PEP 3147 pyc file.
1816 if _compile(file_py):
1817 fname = (pycache_pyc if __debug__ else pycache_pyo)
1818 arcname = (file_pyc if __debug__ else file_pyo)
1819 else:
1820 fname = arcname = file_py
1821 else:
1822 # new mode: use given optimization level
1823 if self._optimize == 0:
1824 fname = pycache_pyc
1825 arcname = file_pyc
1826 else:
1827 fname = pycache_pyo
1828 arcname = file_pyo
1829 if not (os.path.isfile(fname) and
1830 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1831 if not _compile(file_py, optimize=self._optimize):
1832 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001833 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001834 if basename:
1835 archivename = "%s/%s" % (basename, archivename)
1836 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001837
1838
1839def main(args = None):
1840 import textwrap
1841 USAGE=textwrap.dedent("""\
1842 Usage:
1843 zipfile.py -l zipfile.zip # Show listing of a zipfile
1844 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1845 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1846 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1847 """)
1848 if args is None:
1849 args = sys.argv[1:]
1850
1851 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001852 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001853 sys.exit(1)
1854
1855 if args[0] == '-l':
1856 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001857 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001858 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001859 with ZipFile(args[1], 'r') as zf:
1860 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001861
1862 elif args[0] == '-t':
1863 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001864 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001865 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001866 with ZipFile(args[1], 'r') as zf:
1867 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001868 if badfile:
1869 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001870 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001871
1872 elif args[0] == '-e':
1873 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001874 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001875 sys.exit(1)
1876
Antoine Pitrou17babc52012-11-17 23:50:08 +01001877 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001878 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001879
1880 elif args[0] == '-c':
1881 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001882 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001883 sys.exit(1)
1884
1885 def addToZip(zf, path, zippath):
1886 if os.path.isfile(path):
1887 zf.write(path, zippath, ZIP_DEFLATED)
1888 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001889 if zippath:
1890 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001891 for nm in os.listdir(path):
1892 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001893 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001894 # else: ignore
1895
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001896 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001897 for path in args[2:]:
1898 zippath = os.path.basename(path)
1899 if not zippath:
1900 zippath = os.path.basename(os.path.dirname(path))
1901 if zippath in ('', os.curdir, os.pardir):
1902 zippath = ''
1903 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001904
1905if __name__ == "__main__":
1906 main()