blob: 85bdaa99ae8789ef2bb175cf54419c2dc9521e6c [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
Serhiy Storchakaf15e5242015-01-26 13:53:38 +020016import threading
Barry Warsaw28a691b2010-04-17 00:19:56 +000017
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018
19try:
Tim Peterse1190062001-01-15 03:34:38 +000020 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000021 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040022except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020026try:
27 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029 bz2 = None
30
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020031try:
32 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040033except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034 lzma = None
35
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020036__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000038 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Georg Brandl4d540882010-10-28 06:42:33 +000040class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000041 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042
43
44class LargeZipFile(Exception):
45 """
46 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
47 and those extensions are disabled.
48 """
49
Georg Brandl4d540882010-10-28 06:42:33 +000050error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
51
Guido van Rossum32abe6f2000-03-31 17:30:02 +000052
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000053ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030054ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000056
Guido van Rossum32abe6f2000-03-31 17:30:02 +000057# constants for Zip file compression methods
58ZIP_STORED = 0
59ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020060ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020061ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000062# Other ZIP compression methods not supported
63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020064DEFAULT_VERSION = 20
65ZIP64_VERSION = 45
66BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020068# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020070
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071# Below are some formats and associated data for reading/writing headers using
72# the struct module. The names and structures of headers/records are those used
73# in the PKWARE description of the ZIP file format:
74# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
75# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000076
Martin v. Löwisb09b8442008-07-03 14:13:42 +000077# The "end of central directory" structure, magic number, size, and indices
78# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000079structEndArchive = b"<4s4H2LH"
80stringEndArchive = b"PK\005\006"
81sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000082
83_ECD_SIGNATURE = 0
84_ECD_DISK_NUMBER = 1
85_ECD_DISK_START = 2
86_ECD_ENTRIES_THIS_DISK = 3
87_ECD_ENTRIES_TOTAL = 4
88_ECD_SIZE = 5
89_ECD_OFFSET = 6
90_ECD_COMMENT_SIZE = 7
91# These last two indices are not part of the structure as defined in the
92# spec, but they are used internally by this module as a convenience
93_ECD_COMMENT = 8
94_ECD_LOCATION = 9
95
96# The "central directory" structure, magic number, size, and indices
97# of entries in the structure (section V.F in the format document)
98structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000099stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100sizeCentralDir = struct.calcsize(structCentralDir)
101
Fred Drake3e038e52001-02-28 17:56:26 +0000102# indexes of entries in the central directory structure
103_CD_SIGNATURE = 0
104_CD_CREATE_VERSION = 1
105_CD_CREATE_SYSTEM = 2
106_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000107_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000108_CD_FLAG_BITS = 5
109_CD_COMPRESS_TYPE = 6
110_CD_TIME = 7
111_CD_DATE = 8
112_CD_CRC = 9
113_CD_COMPRESSED_SIZE = 10
114_CD_UNCOMPRESSED_SIZE = 11
115_CD_FILENAME_LENGTH = 12
116_CD_EXTRA_FIELD_LENGTH = 13
117_CD_COMMENT_LENGTH = 14
118_CD_DISK_NUMBER_START = 15
119_CD_INTERNAL_FILE_ATTRIBUTES = 16
120_CD_EXTERNAL_FILE_ATTRIBUTES = 17
121_CD_LOCAL_HEADER_OFFSET = 18
122
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123# The "local file header" structure, magic number, size, and indices
124# (section V.A in the format document)
125structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000126stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000127sizeFileHeader = struct.calcsize(structFileHeader)
128
Fred Drake3e038e52001-02-28 17:56:26 +0000129_FH_SIGNATURE = 0
130_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_GENERAL_PURPOSE_FLAG_BITS = 3
133_FH_COMPRESSION_METHOD = 4
134_FH_LAST_MOD_TIME = 5
135_FH_LAST_MOD_DATE = 6
136_FH_CRC = 7
137_FH_COMPRESSED_SIZE = 8
138_FH_UNCOMPRESSED_SIZE = 9
139_FH_FILENAME_LENGTH = 10
140_FH_EXTRA_FIELD_LENGTH = 11
141
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000143structEndArchive64Locator = "<4sLQL"
144stringEndArchive64Locator = b"PK\x06\x07"
145sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000146
147# The "Zip64 end of central directory" record, magic number, size, and indices
148# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000149structEndArchive64 = "<4sQ2H2L4Q"
150stringEndArchive64 = b"PK\x06\x06"
151sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000152
153_CD64_SIGNATURE = 0
154_CD64_DIRECTORY_RECSIZE = 1
155_CD64_CREATE_VERSION = 2
156_CD64_EXTRACT_VERSION = 3
157_CD64_DISK_NUMBER = 4
158_CD64_DISK_NUMBER_START = 5
159_CD64_NUMBER_ENTRIES_THIS_DISK = 6
160_CD64_NUMBER_ENTRIES_TOTAL = 7
161_CD64_DIRECTORY_SIZE = 8
162_CD64_OFFSET_START_CENTDIR = 9
163
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166 if _EndRecData(fp):
167 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200168 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000170 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000172def is_zipfile(filename):
173 """Quickly see if a file is a ZIP file by checking the magic number.
174
175 The filename argument may be a file or file-like object too.
176 """
177 result = False
178 try:
179 if hasattr(filename, "read"):
180 result = _check_zipfile(fp=filename)
181 else:
182 with open(filename, "rb") as fp:
183 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200184 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185 pass
186 return result
187
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188def _EndRecData64(fpin, offset, endrec):
189 """
190 Read the ZIP64 end-of-archive records and use that to update endrec
191 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000192 try:
193 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200194 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 # If the seek fails, the file is not large enough to contain a ZIP64
196 # end-of-archive record, so just return the end record we were given.
197 return endrec
198
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000199 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200200 if len(data) != sizeEndCentDir64Locator:
201 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000202 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
203 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 return endrec
205
206 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000207 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208
209 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
211 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200212 if len(data) != sizeEndCentDir64:
213 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200215 dircount, dircount2, dirsize, diroffset = \
216 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000218 return endrec
219
220 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000221 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222 endrec[_ECD_DISK_NUMBER] = disk_num
223 endrec[_ECD_DISK_START] = disk_dir
224 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
225 endrec[_ECD_ENTRIES_TOTAL] = dircount2
226 endrec[_ECD_SIZE] = dirsize
227 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
230
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000231def _EndRecData(fpin):
232 """Return data from the "End of Central Directory" record, or None.
233
234 The data is a list of the nine items in the ZIP "End of central dir"
235 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236
237 # Determine file size
238 fpin.seek(0, 2)
239 filesize = fpin.tell()
240
241 # Check to see if this is ZIP file with no archive comment (the
242 # "end of central directory" structure should be the last item in the
243 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000244 try:
245 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200246 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200249 if (len(data) == sizeEndCentDir and
250 data[0:4] == stringEndArchive and
251 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000253 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec=list(endrec)
255
256 # Append a blank comment and record start offset
257 endrec.append(b"")
258 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000260 # Try to read the "Zip64 end of central directory" structure
261 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
263 # Either this is not a ZIP file, or it is a ZIP file with an archive
264 # comment. Search the end of the file for the "end of central directory"
265 # record signature. The comment is the last item in the ZIP file and may be
266 # up to 64K long. It is assumed that the "end of central directory" magic
267 # number does not appear in the comment.
268 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
269 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000270 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000271 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000272 if start >= 0:
273 # found the magic number; attempt to unpack and interpret
274 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200275 if len(recData) != sizeEndCentDir:
276 # Zip file is corrupted.
277 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000278 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400279 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
280 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
281 endrec.append(comment)
282 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000283
R David Murray4fbb9db2011-06-09 15:50:51 -0400284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, maxCommentStart + start - filesize,
286 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000287
288 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200289 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000290
Fred Drake484d7352000-10-02 21:14:52 +0000291
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000293 """Class with attributes describing each file in the ZIP archive."""
294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200296 'orig_filename',
297 'filename',
298 'date_time',
299 'compress_type',
300 'comment',
301 'extra',
302 'create_system',
303 'create_version',
304 'extract_version',
305 'reserved',
306 'flag_bits',
307 'volume',
308 'internal_attr',
309 'external_attr',
310 'header_offset',
311 'CRC',
312 'compress_size',
313 'file_size',
314 '_raw_time',
315 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000318 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
320 # Terminate the file name at the first null byte. Null bytes in file
321 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000322 null_byte = filename.find(chr(0))
323 if null_byte >= 0:
324 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325 # This is used to ensure paths in generated ZIP files always use
326 # forward slashes as the directory separator, as required by the
327 # ZIP format specification.
328 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000329 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330
Greg Ward8e36d282003-06-18 00:53:06 +0000331 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000332 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800333
334 if date_time[0] < 1980:
335 raise ValueError('ZIP does not support timestamps before 1980')
336
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
371 isdir = self.filename[-1:] == '/'
372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000439 if tp == 1:
440 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 0:
447 counts = ()
448 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000449 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
451 idx = 0
452
453 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000454 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455 self.file_size = counts[idx]
456 idx += 1
457
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000458 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.compress_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 old = self.header_offset
464 self.header_offset = counts[idx]
465 idx+=1
466
467 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468
469
Thomas Wouterscf297e42007-02-23 15:07:44 +0000470class _ZipDecrypter:
471 """Class to handle decryption of files stored within a ZIP archive.
472
473 ZIP supports a password-based form of encryption. Even though known
474 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000475 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000476
477 Usage:
478 zd = _ZipDecrypter(mypwd)
479 plain_char = zd(cypher_char)
480 plain_text = map(zd, cypher_text)
481 """
482
483 def _GenerateCRCTable():
484 """Generate a CRC-32 table.
485
486 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
487 internal keys. We noticed that a direct implementation is faster than
488 relying on binascii.crc32().
489 """
490 poly = 0xedb88320
491 table = [0] * 256
492 for i in range(256):
493 crc = i
494 for j in range(8):
495 if crc & 1:
496 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
497 else:
498 crc = ((crc >> 1) & 0x7FFFFFFF)
499 table[i] = crc
500 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500501 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502
503 def _crc32(self, ch, crc):
504 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000505 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000506
507 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500508 if _ZipDecrypter.crctable is None:
509 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000510 self.key0 = 305419896
511 self.key1 = 591751049
512 self.key2 = 878082192
513 for p in pwd:
514 self._UpdateKeys(p)
515
516 def _UpdateKeys(self, c):
517 self.key0 = self._crc32(c, self.key0)
518 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
519 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000520 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000521
522 def __call__(self, c):
523 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000524 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000525 k = self.key2 | 2
526 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000527 self._UpdateKeys(c)
528 return c
529
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200530
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200531class LZMACompressor:
532
533 def __init__(self):
534 self._comp = None
535
536 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200537 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200538 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200539 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200540 ])
541 return struct.pack('<BBH', 9, 4, len(props)) + props
542
543 def compress(self, data):
544 if self._comp is None:
545 return self._init() + self._comp.compress(data)
546 return self._comp.compress(data)
547
548 def flush(self):
549 if self._comp is None:
550 return self._init() + self._comp.flush()
551 return self._comp.flush()
552
553
554class LZMADecompressor:
555
556 def __init__(self):
557 self._decomp = None
558 self._unconsumed = b''
559 self.eof = False
560
561 def decompress(self, data):
562 if self._decomp is None:
563 self._unconsumed += data
564 if len(self._unconsumed) <= 4:
565 return b''
566 psize, = struct.unpack('<H', self._unconsumed[2:4])
567 if len(self._unconsumed) <= 4 + psize:
568 return b''
569
570 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200571 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
572 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200573 ])
574 data = self._unconsumed[4 + psize:]
575 del self._unconsumed
576
577 result = self._decomp.decompress(data)
578 self.eof = self._decomp.eof
579 return result
580
581
582compressor_names = {
583 0: 'store',
584 1: 'shrink',
585 2: 'reduce',
586 3: 'reduce',
587 4: 'reduce',
588 5: 'reduce',
589 6: 'implode',
590 7: 'tokenize',
591 8: 'deflate',
592 9: 'deflate64',
593 10: 'implode',
594 12: 'bzip2',
595 14: 'lzma',
596 18: 'terse',
597 19: 'lz77',
598 97: 'wavpack',
599 98: 'ppmd',
600}
601
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200602def _check_compression(compression):
603 if compression == ZIP_STORED:
604 pass
605 elif compression == ZIP_DEFLATED:
606 if not zlib:
607 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200608 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609 elif compression == ZIP_BZIP2:
610 if not bz2:
611 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200612 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200613 elif compression == ZIP_LZMA:
614 if not lzma:
615 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200616 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200617 else:
618 raise RuntimeError("That compression method is not supported")
619
620
621def _get_compressor(compress_type):
622 if compress_type == ZIP_DEFLATED:
623 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200624 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200625 elif compress_type == ZIP_BZIP2:
626 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200627 elif compress_type == ZIP_LZMA:
628 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200629 else:
630 return None
631
632
633def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200634 if compress_type == ZIP_STORED:
635 return None
636 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 return zlib.decompressobj(-15)
638 elif compress_type == ZIP_BZIP2:
639 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200640 elif compress_type == ZIP_LZMA:
641 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200642 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200643 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200644 if descr:
645 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
646 else:
647 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200648
649
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200650class _SharedFile:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200651 def __init__(self, file, pos, close, lock):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200652 self._file = file
653 self._pos = pos
654 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200655 self._lock = lock
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200656
657 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200658 with self._lock:
659 self._file.seek(self._pos)
660 data = self._file.read(n)
661 self._pos = self._file.tell()
662 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200663
664 def close(self):
665 if self._file is not None:
666 fileobj = self._file
667 self._file = None
668 self._close(fileobj)
669
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200670# Provide the tell method for unseekable stream
671class _Tellable:
672 def __init__(self, fp):
673 self.fp = fp
674 self.offset = 0
675
676 def write(self, data):
677 n = self.fp.write(data)
678 self.offset += n
679 return n
680
681 def tell(self):
682 return self.offset
683
684 def flush(self):
685 self.fp.flush()
686
687 def close(self):
688 self.fp.close()
689
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200690
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000691class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000692 """File-like object for reading an archive member.
693 Is returned by ZipFile.open().
694 """
695
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000696 # Max size supported by decompressor.
697 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000699 # Read from compressed files in 4k blocks.
700 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000701
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000702 # Search for universal newlines or line chunks.
703 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
704
Łukasz Langae94980a2010-11-22 23:31:26 +0000705 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
706 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000707 self._fileobj = fileobj
708 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000709 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000710
Ezio Melotti92b47432010-01-28 01:44:41 +0000711 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000712 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000714
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200715 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000716
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200717 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000718 self._readbuffer = b''
719 self._offset = 0
720
721 self._universal = 'U' in mode
722 self.newlines = None
723
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000724 # Adjust read size for encrypted files since the first 12 bytes
725 # are for the encryption/password information.
726 if self._decrypter is not None:
727 self._compress_left -= 12
728
729 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000730 self.name = zipinfo.filename
731
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000732 if hasattr(zipinfo, 'CRC'):
733 self._expected_crc = zipinfo.CRC
734 self._running_crc = crc32(b'') & 0xffffffff
735 else:
736 self._expected_crc = None
737
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200738 def __repr__(self):
739 result = ['<%s.%s' % (self.__class__.__module__,
740 self.__class__.__qualname__)]
741 if not self.closed:
742 result.append(' name=%r mode=%r' % (self.name, self.mode))
743 if self._compress_type != ZIP_STORED:
744 result.append(' compress_type=%s' %
745 compressor_names.get(self._compress_type,
746 self._compress_type))
747 else:
748 result.append(' [closed]')
749 result.append('>')
750 return ''.join(result)
751
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000752 def readline(self, limit=-1):
753 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000756 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000757
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758 if not self._universal and limit < 0:
759 # Shortcut common case - newline found in buffer.
760 i = self._readbuffer.find(b'\n', self._offset) + 1
761 if i > 0:
762 line = self._readbuffer[self._offset: i]
763 self._offset = i
764 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000765
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000766 if not self._universal:
767 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000768
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000769 line = b''
770 while limit < 0 or len(line) < limit:
771 readahead = self.peek(2)
772 if readahead == b'':
773 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000775 #
776 # Search for universal newlines or line chunks.
777 #
778 # The pattern returns either a line chunk or a newline, but not
779 # both. Combined with peek(2), we are assured that the sequence
780 # '\r\n' is always retrieved completely and never split into
781 # separate newlines - '\r', '\n' due to coincidental readaheads.
782 #
783 match = self.PATTERN.search(readahead)
784 newline = match.group('newline')
785 if newline is not None:
786 if self.newlines is None:
787 self.newlines = []
788 if newline not in self.newlines:
789 self.newlines.append(newline)
790 self._offset += len(newline)
791 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000792
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793 chunk = match.group('chunk')
794 if limit >= 0:
795 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797 self._offset += len(chunk)
798 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800 return line
801
802 def peek(self, n=1):
803 """Returns buffered bytes without advancing the position."""
804 if n > len(self._readbuffer) - self._offset:
805 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200806 if len(chunk) > self._offset:
807 self._readbuffer = chunk + self._readbuffer[self._offset:]
808 self._offset = 0
809 else:
810 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000811
812 # Return up to 512 bytes to reduce allocation overhead for tight loops.
813 return self._readbuffer[self._offset: self._offset + 512]
814
815 def readable(self):
816 return True
817
818 def read(self, n=-1):
819 """Read and return up to n bytes.
820 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200822 if n is None or n < 0:
823 buf = self._readbuffer[self._offset:]
824 self._readbuffer = b''
825 self._offset = 0
826 while not self._eof:
827 buf += self._read1(self.MAX_N)
828 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829
Antoine Pitrou78157b32012-06-23 16:44:48 +0200830 end = n + self._offset
831 if end < len(self._readbuffer):
832 buf = self._readbuffer[self._offset:end]
833 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200834 return buf
835
Antoine Pitrou78157b32012-06-23 16:44:48 +0200836 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200837 buf = self._readbuffer[self._offset:]
838 self._readbuffer = b''
839 self._offset = 0
840 while n > 0 and not self._eof:
841 data = self._read1(n)
842 if n < len(data):
843 self._readbuffer = data
844 self._offset = n
845 buf += data[:n]
846 break
847 buf += data
848 n -= len(data)
849 return buf
850
851 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000852 # Update the CRC using the given data.
853 if self._expected_crc is None:
854 # No need to compute the CRC if we don't have a reference value
855 return
856 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
857 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200858 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000859 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000860
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000861 def read1(self, n):
862 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200864 if n is None or n < 0:
865 buf = self._readbuffer[self._offset:]
866 self._readbuffer = b''
867 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300868 while not self._eof:
869 data = self._read1(self.MAX_N)
870 if data:
871 buf += data
872 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200873 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874
Antoine Pitrou78157b32012-06-23 16:44:48 +0200875 end = n + self._offset
876 if end < len(self._readbuffer):
877 buf = self._readbuffer[self._offset:end]
878 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200879 return buf
880
Antoine Pitrou78157b32012-06-23 16:44:48 +0200881 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 buf = self._readbuffer[self._offset:]
883 self._readbuffer = b''
884 self._offset = 0
885 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300886 while not self._eof:
887 data = self._read1(n)
888 if n < len(data):
889 self._readbuffer = data
890 self._offset = n
891 buf += data[:n]
892 break
893 if data:
894 buf += data
895 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200896 return buf
897
898 def _read1(self, n):
899 # Read up to n compressed bytes with at most one read() system call,
900 # decrypt and decompress them.
901 if self._eof or n <= 0:
902 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000904 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200905 if self._compress_type == ZIP_DEFLATED:
906 ## Handle unconsumed data.
907 data = self._decompressor.unconsumed_tail
908 if n > len(data):
909 data += self._read2(n - len(data))
910 else:
911 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200913 if self._compress_type == ZIP_STORED:
914 self._eof = self._compress_left <= 0
915 elif self._compress_type == ZIP_DEFLATED:
916 n = max(n, self.MIN_READ_SIZE)
917 data = self._decompressor.decompress(data, n)
918 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200919 self._compress_left <= 0 and
920 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200921 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000922 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200923 else:
924 data = self._decompressor.decompress(data)
925 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200927 data = data[:self._left]
928 self._left -= len(data)
929 if self._left <= 0:
930 self._eof = True
931 self._update_crc(data)
932 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000933
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 def _read2(self, n):
935 if self._compress_left <= 0:
936 return b''
937
938 n = max(n, self.MIN_READ_SIZE)
939 n = min(n, self._compress_left)
940
941 data = self._fileobj.read(n)
942 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200943 if not data:
944 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200945
946 if self._decrypter is not None:
947 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000948 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949
Łukasz Langae94980a2010-11-22 23:31:26 +0000950 def close(self):
951 try:
952 if self._close_fileobj:
953 self._fileobj.close()
954 finally:
955 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000956
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000957
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000958class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000959 """ Class with methods to open, read, write, close, list zip files.
960
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200961 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000962
Fred Drake3d9091e2001-03-26 15:49:24 +0000963 file: Either the path to the file, or a file-like object.
964 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +0200965 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
966 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200967 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
968 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000969 allowZip64: if True ZipFile will create files with ZIP64 extensions when
970 needed, otherwise it will raise an exception when this would
971 be necessary.
972
Fred Drake3d9091e2001-03-26 15:49:24 +0000973 """
Fred Drake484d7352000-10-02 21:14:52 +0000974
Fred Drake90eac282001-02-28 05:29:34 +0000975 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800976 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000977
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200978 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +0200979 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
980 or append 'a'."""
981 if mode not in ('r', 'w', 'x', 'a'):
982 raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000983
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200984 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000985
986 self._allowZip64 = allowZip64
987 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000988 self.debug = 0 # Level of printing: 0 through 3
989 self.NameToInfo = {} # Find file info given name
990 self.filelist = [] # List of ZipInfo instances for archive
991 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200992 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +0000993 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400994 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000995
Fred Drake3d9091e2001-03-26 15:49:24 +0000996 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000997 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000998 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000999 self._filePassed = 0
1000 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001001 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1002 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001003 filemode = modeDict[mode]
1004 while True:
1005 try:
1006 self.fp = io.open(file, filemode)
1007 except OSError:
1008 if filemode in modeDict:
1009 filemode = modeDict[filemode]
1010 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001011 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001012 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001013 else:
1014 self._filePassed = 1
1015 self.fp = file
1016 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001017 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001018 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001019 self._seekable = True
Tim Petersa19a1682001-03-29 04:36:09 +00001020
Antoine Pitrou17babc52012-11-17 23:50:08 +01001021 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001022 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001023 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001024 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001025 # set the modified flag so central directory gets written
1026 # even if no files are added to the archive
1027 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001028 try:
1029 self.start_dir = self.fp.tell()
1030 except (AttributeError, OSError):
1031 self.fp = _Tellable(self.fp)
1032 self.start_dir = 0
1033 self._seekable = False
1034 else:
1035 # Some file-like objects can provide tell() but not seek()
1036 try:
1037 self.fp.seek(self.start_dir)
1038 except (AttributeError, OSError):
1039 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001040 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001041 try:
1042 # See if file is a zip file
1043 self._RealGetContents()
1044 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001045 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001046 except BadZipFile:
1047 # file is not a zip file, just append
1048 self.fp.seek(0, 2)
1049
1050 # set the modified flag so central directory gets written
1051 # even if no files are added to the archive
1052 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001053 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001054 else:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001055 raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001056 except:
1057 fp = self.fp
1058 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001059 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001060 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001062 def __enter__(self):
1063 return self
1064
1065 def __exit__(self, type, value, traceback):
1066 self.close()
1067
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001068 def __repr__(self):
1069 result = ['<%s.%s' % (self.__class__.__module__,
1070 self.__class__.__qualname__)]
1071 if self.fp is not None:
1072 if self._filePassed:
1073 result.append(' file=%r' % self.fp)
1074 elif self.filename is not None:
1075 result.append(' filename=%r' % self.filename)
1076 result.append(' mode=%r' % self.mode)
1077 else:
1078 result.append(' [closed]')
1079 result.append('>')
1080 return ''.join(result)
1081
Tim Peters7d3bad62001-04-04 18:56:49 +00001082 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001083 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001084 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001085 try:
1086 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001087 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001088 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001089 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001090 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001092 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001093 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1094 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001095 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001096
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001098 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001099 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1100 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001101 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1102
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001104 inferred = concat + offset_cd
1105 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 # self.start_dir: Position of start of central directory
1107 self.start_dir = offset_cd + concat
1108 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001109 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001110 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 total = 0
1112 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001113 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001114 if len(centdir) != sizeCentralDir:
1115 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001117 if centdir[_CD_SIGNATURE] != stringCentralDir:
1118 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001119 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001120 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001121 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001122 flags = centdir[5]
1123 if flags & 0x800:
1124 # UTF-8 file names extension
1125 filename = filename.decode('utf-8')
1126 else:
1127 # Historical ZIP filename encoding
1128 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001130 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001131 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1132 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001134 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001135 x.flag_bits, x.compress_type, t, d,
1136 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001137 if x.extract_version > MAX_EXTRACT_VERSION:
1138 raise NotImplementedError("zip file version %.1f" %
1139 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1141 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001142 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001144 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001145
1146 x._decodeExtra()
1147 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148 self.filelist.append(x)
1149 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001150
1151 # update total bytes read from central directory
1152 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1153 + centdir[_CD_EXTRA_FIELD_LENGTH]
1154 + centdir[_CD_COMMENT_LENGTH])
1155
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001156 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001157 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159
1160 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001161 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001162 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001163
1164 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001165 """Return a list of class ZipInfo instances for files in the
1166 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001167 return self.filelist
1168
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001169 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001170 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001171 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1172 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001173 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001174 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001175 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1176 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177
1178 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001179 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001180 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 for zinfo in self.filelist:
1182 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001183 # Read by chunks, to avoid an OverflowError or a
1184 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001185 with self.open(zinfo.filename, "r") as f:
1186 while f.read(chunk_size): # Check CRC-32
1187 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001188 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 return zinfo.filename
1190
1191 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001192 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001193 info = self.NameToInfo.get(name)
1194 if info is None:
1195 raise KeyError(
1196 'There is no item named %r in the archive' % name)
1197
1198 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199
Thomas Wouterscf297e42007-02-23 15:07:44 +00001200 def setpassword(self, pwd):
1201 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001202 if pwd and not isinstance(pwd, bytes):
1203 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1204 if pwd:
1205 self.pwd = pwd
1206 else:
1207 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001208
R David Murrayf50b38a2012-04-12 18:44:58 -04001209 @property
1210 def comment(self):
1211 """The comment text associated with the ZIP file."""
1212 return self._comment
1213
1214 @comment.setter
1215 def comment(self, comment):
1216 if not isinstance(comment, bytes):
1217 raise TypeError("comment: expected bytes, got %s" % type(comment))
1218 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001219 if len(comment) > ZIP_MAX_COMMENT:
1220 import warnings
1221 warnings.warn('Archive comment is too long; truncating to %d bytes'
1222 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001223 comment = comment[:ZIP_MAX_COMMENT]
1224 self._comment = comment
1225 self._didModify = True
1226
Thomas Wouterscf297e42007-02-23 15:07:44 +00001227 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001228 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001229 with self.open(name, "r", pwd) as fp:
1230 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001231
1232 def open(self, name, mode="r", pwd=None):
1233 """Return file-like object for 'name'."""
1234 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001235 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001236 if 'U' in mode:
1237 import warnings
1238 warnings.warn("'U' mode is deprecated",
1239 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001240 if pwd and not isinstance(pwd, bytes):
1241 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001243 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001244 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001245
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001246 # Make sure we have an info object
1247 if isinstance(name, ZipInfo):
1248 # 'name' is already an info object
1249 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001250 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001251 # Get info object for name
1252 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001253
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001254 self._fileRefCnt += 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001255 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001256 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001257 # Skip the file header:
1258 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001259 if len(fheader) != sizeFileHeader:
1260 raise BadZipFile("Truncated file header")
1261 fheader = struct.unpack(structFileHeader, fheader)
1262 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001263 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001264
Antoine Pitrou17babc52012-11-17 23:50:08 +01001265 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1266 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1267 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001268
Antoine Pitrou8572da52012-11-17 23:52:05 +01001269 if zinfo.flag_bits & 0x20:
1270 # Zip 2.7: compressed patched data
1271 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001272
Antoine Pitrou8572da52012-11-17 23:52:05 +01001273 if zinfo.flag_bits & 0x40:
1274 # strong encryption
1275 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001276
Antoine Pitrou17babc52012-11-17 23:50:08 +01001277 if zinfo.flag_bits & 0x800:
1278 # UTF-8 filename
1279 fname_str = fname.decode("utf-8")
1280 else:
1281 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001282
Antoine Pitrou17babc52012-11-17 23:50:08 +01001283 if fname_str != zinfo.orig_filename:
1284 raise BadZipFile(
1285 'File name in directory %r and header %r differ.'
1286 % (zinfo.orig_filename, fname))
1287
1288 # check for encrypted flag & handle password
1289 is_encrypted = zinfo.flag_bits & 0x1
1290 zd = None
1291 if is_encrypted:
1292 if not pwd:
1293 pwd = self.pwd
1294 if not pwd:
1295 raise RuntimeError("File %s is encrypted, password "
1296 "required for extraction" % name)
1297
1298 zd = _ZipDecrypter(pwd)
1299 # The first 12 bytes in the cypher stream is an encryption header
1300 # used to strengthen the algorithm. The first 11 bytes are
1301 # completely random, while the 12th contains the MSB of the CRC,
1302 # or the MSB of the file time depending on the header type
1303 # and is used to check the correctness of the password.
1304 header = zef_file.read(12)
1305 h = list(map(zd, header[0:12]))
1306 if zinfo.flag_bits & 0x8:
1307 # compare against the file type from extended local headers
1308 check_byte = (zinfo._raw_time >> 8) & 0xff
1309 else:
1310 # compare against the CRC otherwise
1311 check_byte = (zinfo.CRC >> 24) & 0xff
1312 if h[11] != check_byte:
1313 raise RuntimeError("Bad password for file", name)
1314
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001315 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001316 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001317 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001318 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001319
Christian Heimes790c8232008-01-07 21:14:23 +00001320 def extract(self, member, path=None, pwd=None):
1321 """Extract a member from the archive to the current working directory,
1322 using its full name. Its file information is extracted as accurately
1323 as possible. `member' may be a filename or a ZipInfo object. You can
1324 specify a different directory using `path'.
1325 """
1326 if not isinstance(member, ZipInfo):
1327 member = self.getinfo(member)
1328
1329 if path is None:
1330 path = os.getcwd()
1331
1332 return self._extract_member(member, path, pwd)
1333
1334 def extractall(self, path=None, members=None, pwd=None):
1335 """Extract all members from the archive to the current working
1336 directory. `path' specifies a different directory to extract to.
1337 `members' is optional and must be a subset of the list returned
1338 by namelist().
1339 """
1340 if members is None:
1341 members = self.namelist()
1342
1343 for zipinfo in members:
1344 self.extract(zipinfo, path, pwd)
1345
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001346 @classmethod
1347 def _sanitize_windows_name(cls, arcname, pathsep):
1348 """Replace bad characters and remove trailing dots from parts."""
1349 table = cls._windows_illegal_name_trans_table
1350 if not table:
1351 illegal = ':<>|"?*'
1352 table = str.maketrans(illegal, '_' * len(illegal))
1353 cls._windows_illegal_name_trans_table = table
1354 arcname = arcname.translate(table)
1355 # remove trailing dots
1356 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1357 # rejoin, removing empty parts.
1358 arcname = pathsep.join(x for x in arcname if x)
1359 return arcname
1360
Christian Heimes790c8232008-01-07 21:14:23 +00001361 def _extract_member(self, member, targetpath, pwd):
1362 """Extract the ZipInfo object 'member' to a physical
1363 file on the path targetpath.
1364 """
1365 # build the destination pathname, replacing
1366 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001367 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001368
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001369 if os.path.altsep:
1370 arcname = arcname.replace(os.path.altsep, os.path.sep)
1371 # interpret absolute pathname as relative, remove drive letter or
1372 # UNC path, redundant separators, "." and ".." components.
1373 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001374 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001375 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001376 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001377 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001378 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001379 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001380
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001381 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001382 targetpath = os.path.normpath(targetpath)
1383
1384 # Create all upper directories if necessary.
1385 upperdirs = os.path.dirname(targetpath)
1386 if upperdirs and not os.path.exists(upperdirs):
1387 os.makedirs(upperdirs)
1388
Martin v. Löwis59e47792009-01-24 14:10:07 +00001389 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001390 if not os.path.isdir(targetpath):
1391 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001392 return targetpath
1393
Antoine Pitrou17babc52012-11-17 23:50:08 +01001394 with self.open(member, pwd=pwd) as source, \
1395 open(targetpath, "wb") as target:
1396 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001397
1398 return targetpath
1399
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001400 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001401 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001402 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001403 import warnings
1404 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001405 if self.mode not in ('w', 'x', 'a'):
1406 raise RuntimeError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001407 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001408 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001409 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001410 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001411 if not self._allowZip64:
1412 requires_zip64 = None
1413 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1414 requires_zip64 = "Files count"
1415 elif zinfo.file_size > ZIP64_LIMIT:
1416 requires_zip64 = "Filesize"
1417 elif zinfo.header_offset > ZIP64_LIMIT:
1418 requires_zip64 = "Zipfile size"
1419 if requires_zip64:
1420 raise LargeZipFile(requires_zip64 +
1421 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422
1423 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001424 """Put the bytes from filename into the archive under the name
1425 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001426 if not self.fp:
1427 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001428 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001429
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001430 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001431 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001432 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001433 date_time = mtime[0:6]
1434 # Create ZipInfo instance to store file information
1435 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001436 arcname = filename
1437 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1438 while arcname[0] in (os.sep, os.altsep):
1439 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001440 if isdir:
1441 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001442 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001443 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001444 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001445 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001446 else:
Tim Peterse1190062001-01-15 03:34:38 +00001447 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001448
1449 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001450 zinfo.flag_bits = 0x00
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001451 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001452 if self._seekable:
1453 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001454 zinfo.header_offset = self.fp.tell() # Start of header bytes
1455 if zinfo.compress_type == ZIP_LZMA:
1456 # Compressed data includes an end-of-stream (EOS) marker
1457 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001458
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001459 self._writecheck(zinfo)
1460 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001461
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001462 if isdir:
1463 zinfo.file_size = 0
1464 zinfo.compress_size = 0
1465 zinfo.CRC = 0
1466 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1467 self.filelist.append(zinfo)
1468 self.NameToInfo[zinfo.filename] = zinfo
1469 self.fp.write(zinfo.FileHeader(False))
1470 self.start_dir = self.fp.tell()
1471 return
1472
1473 cmpr = _get_compressor(zinfo.compress_type)
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001474 if not self._seekable:
1475 zinfo.flag_bits |= 0x08
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001476 with open(filename, "rb") as fp:
1477 # Must overwrite CRC and sizes with correct data later
1478 zinfo.CRC = CRC = 0
1479 zinfo.compress_size = compress_size = 0
1480 # Compressed size can be larger than uncompressed size
1481 zip64 = self._allowZip64 and \
1482 zinfo.file_size * 1.05 > ZIP64_LIMIT
1483 self.fp.write(zinfo.FileHeader(zip64))
1484 file_size = 0
1485 while 1:
1486 buf = fp.read(1024 * 8)
1487 if not buf:
1488 break
1489 file_size = file_size + len(buf)
1490 CRC = crc32(buf, CRC) & 0xffffffff
1491 if cmpr:
1492 buf = cmpr.compress(buf)
1493 compress_size = compress_size + len(buf)
1494 self.fp.write(buf)
1495 if cmpr:
1496 buf = cmpr.flush()
1497 compress_size = compress_size + len(buf)
1498 self.fp.write(buf)
1499 zinfo.compress_size = compress_size
1500 else:
1501 zinfo.compress_size = file_size
1502 zinfo.CRC = CRC
1503 zinfo.file_size = file_size
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001504 if zinfo.flag_bits & 0x08:
1505 # Write CRC and file sizes after the file data
1506 fmt = '<LQQ' if zip64 else '<LLL'
1507 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1508 zinfo.file_size))
1509 self.start_dir = self.fp.tell()
1510 else:
1511 if not zip64 and self._allowZip64:
1512 if file_size > ZIP64_LIMIT:
1513 raise RuntimeError('File size has increased during compressing')
1514 if compress_size > ZIP64_LIMIT:
1515 raise RuntimeError('Compressed size larger than uncompressed size')
1516 # Seek backwards and write file header (which will now include
1517 # correct CRC and file sizes)
1518 self.start_dir = self.fp.tell() # Preserve current position in file
1519 self.fp.seek(zinfo.header_offset)
1520 self.fp.write(zinfo.FileHeader(zip64))
1521 self.fp.seek(self.start_dir)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001522 self.filelist.append(zinfo)
1523 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001524
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001525 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001526 """Write a file into the archive. The contents is 'data', which
1527 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1528 it is encoded as UTF-8 first.
1529 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001530 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001531 if isinstance(data, str):
1532 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001533 if not isinstance(zinfo_or_arcname, ZipInfo):
1534 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001535 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001536 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001537 if zinfo.filename[-1] == '/':
1538 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1539 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1540 else:
1541 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001542 else:
1543 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001544
1545 if not self.fp:
1546 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001547 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001548
Guido van Rossum85825dc2007-08-27 17:03:28 +00001549 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001550 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001551 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001552 self.fp.seek(self.start_dir)
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001553 zinfo.header_offset = self.fp.tell() # Start of header data
1554 if compress_type is not None:
1555 zinfo.compress_type = compress_type
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001556 zinfo.header_offset = self.fp.tell() # Start of header data
1557 if compress_type is not None:
1558 zinfo.compress_type = compress_type
1559 if zinfo.compress_type == ZIP_LZMA:
1560 # Compressed data includes an end-of-stream (EOS) marker
1561 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001562
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001563 self._writecheck(zinfo)
1564 self._didModify = True
1565 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1566 co = _get_compressor(zinfo.compress_type)
1567 if co:
1568 data = co.compress(data) + co.flush()
1569 zinfo.compress_size = len(data) # Compressed size
1570 else:
1571 zinfo.compress_size = zinfo.file_size
1572 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1573 zinfo.compress_size > ZIP64_LIMIT
1574 if zip64 and not self._allowZip64:
1575 raise LargeZipFile("Filesize would require ZIP64 extensions")
1576 self.fp.write(zinfo.FileHeader(zip64))
1577 self.fp.write(data)
1578 if zinfo.flag_bits & 0x08:
1579 # Write CRC and file sizes after the file data
1580 fmt = '<LQQ' if zip64 else '<LLL'
1581 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1582 zinfo.file_size))
1583 self.fp.flush()
1584 self.start_dir = self.fp.tell()
1585 self.filelist.append(zinfo)
1586 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001587
1588 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001589 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001590 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001591
1592 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001593 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001594 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001595 if self.fp is None:
1596 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001597
Antoine Pitrou17babc52012-11-17 23:50:08 +01001598 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001599 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001600 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001601 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001602 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001603 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001604 finally:
1605 fp = self.fp
1606 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001607 self._fpclose(fp)
1608
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001609 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001610 for zinfo in self.filelist: # write central directory
1611 dt = zinfo.date_time
1612 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1613 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1614 extra = []
1615 if zinfo.file_size > ZIP64_LIMIT \
1616 or zinfo.compress_size > ZIP64_LIMIT:
1617 extra.append(zinfo.file_size)
1618 extra.append(zinfo.compress_size)
1619 file_size = 0xffffffff
1620 compress_size = 0xffffffff
1621 else:
1622 file_size = zinfo.file_size
1623 compress_size = zinfo.compress_size
1624
1625 if zinfo.header_offset > ZIP64_LIMIT:
1626 extra.append(zinfo.header_offset)
1627 header_offset = 0xffffffff
1628 else:
1629 header_offset = zinfo.header_offset
1630
1631 extra_data = zinfo.extra
1632 min_version = 0
1633 if extra:
1634 # Append a ZIP64 field to the extra's
1635 extra_data = struct.pack(
1636 '<HH' + 'Q'*len(extra),
1637 1, 8*len(extra), *extra) + extra_data
1638
1639 min_version = ZIP64_VERSION
1640
1641 if zinfo.compress_type == ZIP_BZIP2:
1642 min_version = max(BZIP2_VERSION, min_version)
1643 elif zinfo.compress_type == ZIP_LZMA:
1644 min_version = max(LZMA_VERSION, min_version)
1645
1646 extract_version = max(min_version, zinfo.extract_version)
1647 create_version = max(min_version, zinfo.create_version)
1648 try:
1649 filename, flag_bits = zinfo._encodeFilenameFlags()
1650 centdir = struct.pack(structCentralDir,
1651 stringCentralDir, create_version,
1652 zinfo.create_system, extract_version, zinfo.reserved,
1653 flag_bits, zinfo.compress_type, dostime, dosdate,
1654 zinfo.CRC, compress_size, file_size,
1655 len(filename), len(extra_data), len(zinfo.comment),
1656 0, zinfo.internal_attr, zinfo.external_attr,
1657 header_offset)
1658 except DeprecationWarning:
1659 print((structCentralDir, stringCentralDir, create_version,
1660 zinfo.create_system, extract_version, zinfo.reserved,
1661 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1662 zinfo.CRC, compress_size, file_size,
1663 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1664 0, zinfo.internal_attr, zinfo.external_attr,
1665 header_offset), file=sys.stderr)
1666 raise
1667 self.fp.write(centdir)
1668 self.fp.write(filename)
1669 self.fp.write(extra_data)
1670 self.fp.write(zinfo.comment)
1671
1672 pos2 = self.fp.tell()
1673 # Write end-of-zip-archive record
1674 centDirCount = len(self.filelist)
1675 centDirSize = pos2 - self.start_dir
1676 centDirOffset = self.start_dir
1677 requires_zip64 = None
1678 if centDirCount > ZIP_FILECOUNT_LIMIT:
1679 requires_zip64 = "Files count"
1680 elif centDirOffset > ZIP64_LIMIT:
1681 requires_zip64 = "Central directory offset"
1682 elif centDirSize > ZIP64_LIMIT:
1683 requires_zip64 = "Central directory size"
1684 if requires_zip64:
1685 # Need to write the ZIP64 end-of-archive records
1686 if not self._allowZip64:
1687 raise LargeZipFile(requires_zip64 +
1688 " would require ZIP64 extensions")
1689 zip64endrec = struct.pack(
1690 structEndArchive64, stringEndArchive64,
1691 44, 45, 45, 0, 0, centDirCount, centDirCount,
1692 centDirSize, centDirOffset)
1693 self.fp.write(zip64endrec)
1694
1695 zip64locrec = struct.pack(
1696 structEndArchive64Locator,
1697 stringEndArchive64Locator, 0, pos2, 1)
1698 self.fp.write(zip64locrec)
1699 centDirCount = min(centDirCount, 0xFFFF)
1700 centDirSize = min(centDirSize, 0xFFFFFFFF)
1701 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1702
1703 endrec = struct.pack(structEndArchive, stringEndArchive,
1704 0, 0, centDirCount, centDirCount,
1705 centDirSize, centDirOffset, len(self._comment))
1706 self.fp.write(endrec)
1707 self.fp.write(self._comment)
1708 self.fp.flush()
1709
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001710 def _fpclose(self, fp):
1711 assert self._fileRefCnt > 0
1712 self._fileRefCnt -= 1
1713 if not self._fileRefCnt and not self._filePassed:
1714 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001715
1716
1717class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001718 """Class to create ZIP archives with Python library files and packages."""
1719
Georg Brandl8334fd92010-12-04 10:26:46 +00001720 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001721 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001722 ZipFile.__init__(self, file, mode=mode, compression=compression,
1723 allowZip64=allowZip64)
1724 self._optimize = optimize
1725
Christian Tismer59202e52013-10-21 03:59:23 +02001726 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001727 """Add all files from "pathname" to the ZIP archive.
1728
Fred Drake484d7352000-10-02 21:14:52 +00001729 If pathname is a package directory, search the directory and
1730 all package subdirectories recursively for all *.py and enter
1731 the modules into the archive. If pathname is a plain
1732 directory, listdir *.py and enter all modules. Else, pathname
1733 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001734 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001735 This method will compile the module.py into module.pyc if
1736 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001737 If filterfunc(pathname) is given, it is called with every argument.
1738 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001739 """
Christian Tismer59202e52013-10-21 03:59:23 +02001740 if filterfunc and not filterfunc(pathname):
1741 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001742 label = 'path' if os.path.isdir(pathname) else 'file'
1743 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001744 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001745 dir, name = os.path.split(pathname)
1746 if os.path.isdir(pathname):
1747 initname = os.path.join(pathname, "__init__.py")
1748 if os.path.isfile(initname):
1749 # This is a package directory, add it
1750 if basename:
1751 basename = "%s/%s" % (basename, name)
1752 else:
1753 basename = name
1754 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001755 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001756 fname, arcname = self._get_codename(initname[0:-3], basename)
1757 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001758 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001759 self.write(fname, arcname)
1760 dirlist = os.listdir(pathname)
1761 dirlist.remove("__init__.py")
1762 # Add all *.py files and package subdirectories
1763 for filename in dirlist:
1764 path = os.path.join(pathname, filename)
1765 root, ext = os.path.splitext(filename)
1766 if os.path.isdir(path):
1767 if os.path.isfile(os.path.join(path, "__init__.py")):
1768 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001769 self.writepy(path, basename,
1770 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001771 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001772 if filterfunc and not filterfunc(path):
1773 if self.debug:
1774 print('file "%s" skipped by filterfunc' % path)
1775 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001776 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001777 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001778 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001779 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001780 self.write(fname, arcname)
1781 else:
1782 # This is NOT a package directory, add its files at top level
1783 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001784 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001785 for filename in os.listdir(pathname):
1786 path = os.path.join(pathname, filename)
1787 root, ext = os.path.splitext(filename)
1788 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001789 if filterfunc and not filterfunc(path):
1790 if self.debug:
1791 print('file "%s" skipped by filterfunc' % path)
1792 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001793 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001794 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001795 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001796 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001797 self.write(fname, arcname)
1798 else:
1799 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001800 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001801 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001802 fname, arcname = self._get_codename(pathname[0:-3], basename)
1803 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001804 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001805 self.write(fname, arcname)
1806
1807 def _get_codename(self, pathname, basename):
1808 """Return (filename, archivename) for the path.
1809
Fred Drake484d7352000-10-02 21:14:52 +00001810 Given a module name path, return the correct file path and
1811 archive name, compiling if necessary. For example, given
1812 /python/lib/string, return (/python/lib/string.pyc, string).
1813 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001814 def _compile(file, optimize=-1):
1815 import py_compile
1816 if self.debug:
1817 print("Compiling", file)
1818 try:
1819 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001820 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001821 print(err.msg)
1822 return False
1823 return True
1824
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001825 file_py = pathname + ".py"
1826 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001827 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1828 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1829 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001830 if self._optimize == -1:
1831 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001832 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001833 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1834 # Use .pyc file.
1835 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001836 elif (os.path.isfile(pycache_opt0) and
1837 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001838 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1839 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001840 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001841 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001842 elif (os.path.isfile(pycache_opt1) and
1843 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1844 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001845 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001846 fname = pycache_opt1
1847 arcname = file_pyc
1848 elif (os.path.isfile(pycache_opt2) and
1849 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1850 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1851 # file name in the archive.
1852 fname = pycache_opt2
1853 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001854 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001855 # Compile py into PEP 3147 pyc file.
1856 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001857 if sys.flags.optimize == 0:
1858 fname = pycache_opt0
1859 elif sys.flags.optimize == 1:
1860 fname = pycache_opt1
1861 else:
1862 fname = pycache_opt2
1863 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001864 else:
1865 fname = arcname = file_py
1866 else:
1867 # new mode: use given optimization level
1868 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001869 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001870 arcname = file_pyc
1871 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001872 arcname = file_pyc
1873 if self._optimize == 1:
1874 fname = pycache_opt1
1875 elif self._optimize == 2:
1876 fname = pycache_opt2
1877 else:
1878 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1879 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001880 if not (os.path.isfile(fname) and
1881 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1882 if not _compile(file_py, optimize=self._optimize):
1883 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001884 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001885 if basename:
1886 archivename = "%s/%s" % (basename, archivename)
1887 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001888
1889
1890def main(args = None):
1891 import textwrap
1892 USAGE=textwrap.dedent("""\
1893 Usage:
1894 zipfile.py -l zipfile.zip # Show listing of a zipfile
1895 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1896 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1897 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1898 """)
1899 if args is None:
1900 args = sys.argv[1:]
1901
1902 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001903 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001904 sys.exit(1)
1905
1906 if args[0] == '-l':
1907 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001908 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001909 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001910 with ZipFile(args[1], 'r') as zf:
1911 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001912
1913 elif args[0] == '-t':
1914 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001915 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001916 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001917 with ZipFile(args[1], 'r') as zf:
1918 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001919 if badfile:
1920 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001921 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001922
1923 elif args[0] == '-e':
1924 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001925 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001926 sys.exit(1)
1927
Antoine Pitrou17babc52012-11-17 23:50:08 +01001928 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001929 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001930
1931 elif args[0] == '-c':
1932 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001933 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001934 sys.exit(1)
1935
1936 def addToZip(zf, path, zippath):
1937 if os.path.isfile(path):
1938 zf.write(path, zippath, ZIP_DEFLATED)
1939 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001940 if zippath:
1941 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001942 for nm in os.listdir(path):
1943 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001944 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001945 # else: ignore
1946
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001947 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001948 for path in args[2:]:
1949 zippath = os.path.basename(path)
1950 if not zippath:
1951 zippath = os.path.basename(os.path.dirname(path))
1952 if zippath in ('', os.curdir, os.pardir):
1953 zippath = ''
1954 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001955
1956if __name__ == "__main__":
1957 main()