blob: d0789b69215f43c19b2410a9ae939cfe508ac3de [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
Serhiy Storchakaf15e5242015-01-26 13:53:38 +020016import threading
Barry Warsaw28a691b2010-04-17 00:19:56 +000017
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018
19try:
Tim Peterse1190062001-01-15 03:34:38 +000020 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000021 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040022except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020026try:
27 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029 bz2 = None
30
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020031try:
32 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040033except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034 lzma = None
35
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020036__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000038 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Georg Brandl4d540882010-10-28 06:42:33 +000040class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000041 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042
43
44class LargeZipFile(Exception):
45 """
46 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
47 and those extensions are disabled.
48 """
49
Georg Brandl4d540882010-10-28 06:42:33 +000050error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
51
Guido van Rossum32abe6f2000-03-31 17:30:02 +000052
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000053ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030054ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000056
Guido van Rossum32abe6f2000-03-31 17:30:02 +000057# constants for Zip file compression methods
58ZIP_STORED = 0
59ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020060ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020061ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000062# Other ZIP compression methods not supported
63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020064DEFAULT_VERSION = 20
65ZIP64_VERSION = 45
66BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020068# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020070
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071# Below are some formats and associated data for reading/writing headers using
72# the struct module. The names and structures of headers/records are those used
73# in the PKWARE description of the ZIP file format:
74# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
75# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000076
Martin v. Löwisb09b8442008-07-03 14:13:42 +000077# The "end of central directory" structure, magic number, size, and indices
78# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000079structEndArchive = b"<4s4H2LH"
80stringEndArchive = b"PK\005\006"
81sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000082
83_ECD_SIGNATURE = 0
84_ECD_DISK_NUMBER = 1
85_ECD_DISK_START = 2
86_ECD_ENTRIES_THIS_DISK = 3
87_ECD_ENTRIES_TOTAL = 4
88_ECD_SIZE = 5
89_ECD_OFFSET = 6
90_ECD_COMMENT_SIZE = 7
91# These last two indices are not part of the structure as defined in the
92# spec, but they are used internally by this module as a convenience
93_ECD_COMMENT = 8
94_ECD_LOCATION = 9
95
96# The "central directory" structure, magic number, size, and indices
97# of entries in the structure (section V.F in the format document)
98structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000099stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100sizeCentralDir = struct.calcsize(structCentralDir)
101
Fred Drake3e038e52001-02-28 17:56:26 +0000102# indexes of entries in the central directory structure
103_CD_SIGNATURE = 0
104_CD_CREATE_VERSION = 1
105_CD_CREATE_SYSTEM = 2
106_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000107_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000108_CD_FLAG_BITS = 5
109_CD_COMPRESS_TYPE = 6
110_CD_TIME = 7
111_CD_DATE = 8
112_CD_CRC = 9
113_CD_COMPRESSED_SIZE = 10
114_CD_UNCOMPRESSED_SIZE = 11
115_CD_FILENAME_LENGTH = 12
116_CD_EXTRA_FIELD_LENGTH = 13
117_CD_COMMENT_LENGTH = 14
118_CD_DISK_NUMBER_START = 15
119_CD_INTERNAL_FILE_ATTRIBUTES = 16
120_CD_EXTERNAL_FILE_ATTRIBUTES = 17
121_CD_LOCAL_HEADER_OFFSET = 18
122
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123# The "local file header" structure, magic number, size, and indices
124# (section V.A in the format document)
125structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000126stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000127sizeFileHeader = struct.calcsize(structFileHeader)
128
Fred Drake3e038e52001-02-28 17:56:26 +0000129_FH_SIGNATURE = 0
130_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_GENERAL_PURPOSE_FLAG_BITS = 3
133_FH_COMPRESSION_METHOD = 4
134_FH_LAST_MOD_TIME = 5
135_FH_LAST_MOD_DATE = 6
136_FH_CRC = 7
137_FH_COMPRESSED_SIZE = 8
138_FH_UNCOMPRESSED_SIZE = 9
139_FH_FILENAME_LENGTH = 10
140_FH_EXTRA_FIELD_LENGTH = 11
141
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000143structEndArchive64Locator = "<4sLQL"
144stringEndArchive64Locator = b"PK\x06\x07"
145sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000146
147# The "Zip64 end of central directory" record, magic number, size, and indices
148# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000149structEndArchive64 = "<4sQ2H2L4Q"
150stringEndArchive64 = b"PK\x06\x06"
151sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000152
153_CD64_SIGNATURE = 0
154_CD64_DIRECTORY_RECSIZE = 1
155_CD64_CREATE_VERSION = 2
156_CD64_EXTRACT_VERSION = 3
157_CD64_DISK_NUMBER = 4
158_CD64_DISK_NUMBER_START = 5
159_CD64_NUMBER_ENTRIES_THIS_DISK = 6
160_CD64_NUMBER_ENTRIES_TOTAL = 7
161_CD64_DIRECTORY_SIZE = 8
162_CD64_OFFSET_START_CENTDIR = 9
163
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166 if _EndRecData(fp):
167 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200168 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000170 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000172def is_zipfile(filename):
173 """Quickly see if a file is a ZIP file by checking the magic number.
174
175 The filename argument may be a file or file-like object too.
176 """
177 result = False
178 try:
179 if hasattr(filename, "read"):
180 result = _check_zipfile(fp=filename)
181 else:
182 with open(filename, "rb") as fp:
183 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200184 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185 pass
186 return result
187
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188def _EndRecData64(fpin, offset, endrec):
189 """
190 Read the ZIP64 end-of-archive records and use that to update endrec
191 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000192 try:
193 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200194 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 # If the seek fails, the file is not large enough to contain a ZIP64
196 # end-of-archive record, so just return the end record we were given.
197 return endrec
198
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000199 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200200 if len(data) != sizeEndCentDir64Locator:
201 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000202 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
203 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 return endrec
205
206 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000207 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208
209 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
211 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200212 if len(data) != sizeEndCentDir64:
213 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200215 dircount, dircount2, dirsize, diroffset = \
216 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000218 return endrec
219
220 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000221 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222 endrec[_ECD_DISK_NUMBER] = disk_num
223 endrec[_ECD_DISK_START] = disk_dir
224 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
225 endrec[_ECD_ENTRIES_TOTAL] = dircount2
226 endrec[_ECD_SIZE] = dirsize
227 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
230
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000231def _EndRecData(fpin):
232 """Return data from the "End of Central Directory" record, or None.
233
234 The data is a list of the nine items in the ZIP "End of central dir"
235 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236
237 # Determine file size
238 fpin.seek(0, 2)
239 filesize = fpin.tell()
240
241 # Check to see if this is ZIP file with no archive comment (the
242 # "end of central directory" structure should be the last item in the
243 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000244 try:
245 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200246 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200249 if (len(data) == sizeEndCentDir and
250 data[0:4] == stringEndArchive and
251 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000253 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec=list(endrec)
255
256 # Append a blank comment and record start offset
257 endrec.append(b"")
258 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000260 # Try to read the "Zip64 end of central directory" structure
261 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
263 # Either this is not a ZIP file, or it is a ZIP file with an archive
264 # comment. Search the end of the file for the "end of central directory"
265 # record signature. The comment is the last item in the ZIP file and may be
266 # up to 64K long. It is assumed that the "end of central directory" magic
267 # number does not appear in the comment.
268 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
269 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000270 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000271 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000272 if start >= 0:
273 # found the magic number; attempt to unpack and interpret
274 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200275 if len(recData) != sizeEndCentDir:
276 # Zip file is corrupted.
277 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000278 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400279 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
280 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
281 endrec.append(comment)
282 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000283
R David Murray4fbb9db2011-06-09 15:50:51 -0400284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, maxCommentStart + start - filesize,
286 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000287
288 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200289 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000290
Fred Drake484d7352000-10-02 21:14:52 +0000291
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000293 """Class with attributes describing each file in the ZIP archive."""
294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200296 'orig_filename',
297 'filename',
298 'date_time',
299 'compress_type',
300 'comment',
301 'extra',
302 'create_system',
303 'create_version',
304 'extract_version',
305 'reserved',
306 'flag_bits',
307 'volume',
308 'internal_attr',
309 'external_attr',
310 'header_offset',
311 'CRC',
312 'compress_size',
313 'file_size',
314 '_raw_time',
315 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000318 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
320 # Terminate the file name at the first null byte. Null bytes in file
321 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000322 null_byte = filename.find(chr(0))
323 if null_byte >= 0:
324 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325 # This is used to ensure paths in generated ZIP files always use
326 # forward slashes as the directory separator, as required by the
327 # ZIP format specification.
328 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000329 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330
Greg Ward8e36d282003-06-18 00:53:06 +0000331 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000332 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800333
334 if date_time[0] < 1980:
335 raise ValueError('ZIP does not support timestamps before 1980')
336
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
371 isdir = self.filename[-1:] == '/'
372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000439 if tp == 1:
440 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 0:
447 counts = ()
448 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000449 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
451 idx = 0
452
453 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000454 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455 self.file_size = counts[idx]
456 idx += 1
457
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000458 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.compress_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 old = self.header_offset
464 self.header_offset = counts[idx]
465 idx+=1
466
467 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468
469
Thomas Wouterscf297e42007-02-23 15:07:44 +0000470class _ZipDecrypter:
471 """Class to handle decryption of files stored within a ZIP archive.
472
473 ZIP supports a password-based form of encryption. Even though known
474 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000475 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000476
477 Usage:
478 zd = _ZipDecrypter(mypwd)
479 plain_char = zd(cypher_char)
480 plain_text = map(zd, cypher_text)
481 """
482
483 def _GenerateCRCTable():
484 """Generate a CRC-32 table.
485
486 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
487 internal keys. We noticed that a direct implementation is faster than
488 relying on binascii.crc32().
489 """
490 poly = 0xedb88320
491 table = [0] * 256
492 for i in range(256):
493 crc = i
494 for j in range(8):
495 if crc & 1:
496 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
497 else:
498 crc = ((crc >> 1) & 0x7FFFFFFF)
499 table[i] = crc
500 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500501 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502
503 def _crc32(self, ch, crc):
504 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000505 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000506
507 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500508 if _ZipDecrypter.crctable is None:
509 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000510 self.key0 = 305419896
511 self.key1 = 591751049
512 self.key2 = 878082192
513 for p in pwd:
514 self._UpdateKeys(p)
515
516 def _UpdateKeys(self, c):
517 self.key0 = self._crc32(c, self.key0)
518 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
519 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000520 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000521
522 def __call__(self, c):
523 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000524 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000525 k = self.key2 | 2
526 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000527 self._UpdateKeys(c)
528 return c
529
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200530
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200531class LZMACompressor:
532
533 def __init__(self):
534 self._comp = None
535
536 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200537 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200538 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200539 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200540 ])
541 return struct.pack('<BBH', 9, 4, len(props)) + props
542
543 def compress(self, data):
544 if self._comp is None:
545 return self._init() + self._comp.compress(data)
546 return self._comp.compress(data)
547
548 def flush(self):
549 if self._comp is None:
550 return self._init() + self._comp.flush()
551 return self._comp.flush()
552
553
554class LZMADecompressor:
555
556 def __init__(self):
557 self._decomp = None
558 self._unconsumed = b''
559 self.eof = False
560
561 def decompress(self, data):
562 if self._decomp is None:
563 self._unconsumed += data
564 if len(self._unconsumed) <= 4:
565 return b''
566 psize, = struct.unpack('<H', self._unconsumed[2:4])
567 if len(self._unconsumed) <= 4 + psize:
568 return b''
569
570 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200571 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
572 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200573 ])
574 data = self._unconsumed[4 + psize:]
575 del self._unconsumed
576
577 result = self._decomp.decompress(data)
578 self.eof = self._decomp.eof
579 return result
580
581
582compressor_names = {
583 0: 'store',
584 1: 'shrink',
585 2: 'reduce',
586 3: 'reduce',
587 4: 'reduce',
588 5: 'reduce',
589 6: 'implode',
590 7: 'tokenize',
591 8: 'deflate',
592 9: 'deflate64',
593 10: 'implode',
594 12: 'bzip2',
595 14: 'lzma',
596 18: 'terse',
597 19: 'lz77',
598 97: 'wavpack',
599 98: 'ppmd',
600}
601
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200602def _check_compression(compression):
603 if compression == ZIP_STORED:
604 pass
605 elif compression == ZIP_DEFLATED:
606 if not zlib:
607 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200608 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609 elif compression == ZIP_BZIP2:
610 if not bz2:
611 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200612 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200613 elif compression == ZIP_LZMA:
614 if not lzma:
615 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200616 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200617 else:
618 raise RuntimeError("That compression method is not supported")
619
620
621def _get_compressor(compress_type):
622 if compress_type == ZIP_DEFLATED:
623 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200624 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200625 elif compress_type == ZIP_BZIP2:
626 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200627 elif compress_type == ZIP_LZMA:
628 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200629 else:
630 return None
631
632
633def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200634 if compress_type == ZIP_STORED:
635 return None
636 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 return zlib.decompressobj(-15)
638 elif compress_type == ZIP_BZIP2:
639 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200640 elif compress_type == ZIP_LZMA:
641 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200642 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200643 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200644 if descr:
645 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
646 else:
647 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200648
649
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200650class _SharedFile:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200651 def __init__(self, file, pos, close, lock):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200652 self._file = file
653 self._pos = pos
654 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200655 self._lock = lock
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200656
657 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200658 with self._lock:
659 self._file.seek(self._pos)
660 data = self._file.read(n)
661 self._pos = self._file.tell()
662 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200663
664 def close(self):
665 if self._file is not None:
666 fileobj = self._file
667 self._file = None
668 self._close(fileobj)
669
670
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000671class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000672 """File-like object for reading an archive member.
673 Is returned by ZipFile.open().
674 """
675
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000676 # Max size supported by decompressor.
677 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000678
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000679 # Read from compressed files in 4k blocks.
680 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000681
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000682 # Search for universal newlines or line chunks.
683 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
684
Łukasz Langae94980a2010-11-22 23:31:26 +0000685 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
686 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000687 self._fileobj = fileobj
688 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000689 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000690
Ezio Melotti92b47432010-01-28 01:44:41 +0000691 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000692 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200693 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000694
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200695 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000696
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200697 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000698 self._readbuffer = b''
699 self._offset = 0
700
701 self._universal = 'U' in mode
702 self.newlines = None
703
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000704 # Adjust read size for encrypted files since the first 12 bytes
705 # are for the encryption/password information.
706 if self._decrypter is not None:
707 self._compress_left -= 12
708
709 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000710 self.name = zipinfo.filename
711
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000712 if hasattr(zipinfo, 'CRC'):
713 self._expected_crc = zipinfo.CRC
714 self._running_crc = crc32(b'') & 0xffffffff
715 else:
716 self._expected_crc = None
717
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200718 def __repr__(self):
719 result = ['<%s.%s' % (self.__class__.__module__,
720 self.__class__.__qualname__)]
721 if not self.closed:
722 result.append(' name=%r mode=%r' % (self.name, self.mode))
723 if self._compress_type != ZIP_STORED:
724 result.append(' compress_type=%s' %
725 compressor_names.get(self._compress_type,
726 self._compress_type))
727 else:
728 result.append(' [closed]')
729 result.append('>')
730 return ''.join(result)
731
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000732 def readline(self, limit=-1):
733 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000734
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000735 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000736 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000737
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000738 if not self._universal and limit < 0:
739 # Shortcut common case - newline found in buffer.
740 i = self._readbuffer.find(b'\n', self._offset) + 1
741 if i > 0:
742 line = self._readbuffer[self._offset: i]
743 self._offset = i
744 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000745
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000746 if not self._universal:
747 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000748
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000749 line = b''
750 while limit < 0 or len(line) < limit:
751 readahead = self.peek(2)
752 if readahead == b'':
753 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 #
756 # Search for universal newlines or line chunks.
757 #
758 # The pattern returns either a line chunk or a newline, but not
759 # both. Combined with peek(2), we are assured that the sequence
760 # '\r\n' is always retrieved completely and never split into
761 # separate newlines - '\r', '\n' due to coincidental readaheads.
762 #
763 match = self.PATTERN.search(readahead)
764 newline = match.group('newline')
765 if newline is not None:
766 if self.newlines is None:
767 self.newlines = []
768 if newline not in self.newlines:
769 self.newlines.append(newline)
770 self._offset += len(newline)
771 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000773 chunk = match.group('chunk')
774 if limit >= 0:
775 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000777 self._offset += len(chunk)
778 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000780 return line
781
782 def peek(self, n=1):
783 """Returns buffered bytes without advancing the position."""
784 if n > len(self._readbuffer) - self._offset:
785 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200786 if len(chunk) > self._offset:
787 self._readbuffer = chunk + self._readbuffer[self._offset:]
788 self._offset = 0
789 else:
790 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000791
792 # Return up to 512 bytes to reduce allocation overhead for tight loops.
793 return self._readbuffer[self._offset: self._offset + 512]
794
795 def readable(self):
796 return True
797
798 def read(self, n=-1):
799 """Read and return up to n bytes.
800 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000801 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200802 if n is None or n < 0:
803 buf = self._readbuffer[self._offset:]
804 self._readbuffer = b''
805 self._offset = 0
806 while not self._eof:
807 buf += self._read1(self.MAX_N)
808 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809
Antoine Pitrou78157b32012-06-23 16:44:48 +0200810 end = n + self._offset
811 if end < len(self._readbuffer):
812 buf = self._readbuffer[self._offset:end]
813 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200814 return buf
815
Antoine Pitrou78157b32012-06-23 16:44:48 +0200816 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200817 buf = self._readbuffer[self._offset:]
818 self._readbuffer = b''
819 self._offset = 0
820 while n > 0 and not self._eof:
821 data = self._read1(n)
822 if n < len(data):
823 self._readbuffer = data
824 self._offset = n
825 buf += data[:n]
826 break
827 buf += data
828 n -= len(data)
829 return buf
830
831 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000832 # Update the CRC using the given data.
833 if self._expected_crc is None:
834 # No need to compute the CRC if we don't have a reference value
835 return
836 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
837 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200838 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000839 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000840
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000841 def read1(self, n):
842 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200844 if n is None or n < 0:
845 buf = self._readbuffer[self._offset:]
846 self._readbuffer = b''
847 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300848 while not self._eof:
849 data = self._read1(self.MAX_N)
850 if data:
851 buf += data
852 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200853 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
Antoine Pitrou78157b32012-06-23 16:44:48 +0200855 end = n + self._offset
856 if end < len(self._readbuffer):
857 buf = self._readbuffer[self._offset:end]
858 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200859 return buf
860
Antoine Pitrou78157b32012-06-23 16:44:48 +0200861 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200862 buf = self._readbuffer[self._offset:]
863 self._readbuffer = b''
864 self._offset = 0
865 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300866 while not self._eof:
867 data = self._read1(n)
868 if n < len(data):
869 self._readbuffer = data
870 self._offset = n
871 buf += data[:n]
872 break
873 if data:
874 buf += data
875 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200876 return buf
877
878 def _read1(self, n):
879 # Read up to n compressed bytes with at most one read() system call,
880 # decrypt and decompress them.
881 if self._eof or n <= 0:
882 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000884 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200885 if self._compress_type == ZIP_DEFLATED:
886 ## Handle unconsumed data.
887 data = self._decompressor.unconsumed_tail
888 if n > len(data):
889 data += self._read2(n - len(data))
890 else:
891 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200893 if self._compress_type == ZIP_STORED:
894 self._eof = self._compress_left <= 0
895 elif self._compress_type == ZIP_DEFLATED:
896 n = max(n, self.MIN_READ_SIZE)
897 data = self._decompressor.decompress(data, n)
898 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200899 self._compress_left <= 0 and
900 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200901 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000902 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200903 else:
904 data = self._decompressor.decompress(data)
905 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200907 data = data[:self._left]
908 self._left -= len(data)
909 if self._left <= 0:
910 self._eof = True
911 self._update_crc(data)
912 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000913
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200914 def _read2(self, n):
915 if self._compress_left <= 0:
916 return b''
917
918 n = max(n, self.MIN_READ_SIZE)
919 n = min(n, self._compress_left)
920
921 data = self._fileobj.read(n)
922 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200923 if not data:
924 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200925
926 if self._decrypter is not None:
927 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000928 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929
Łukasz Langae94980a2010-11-22 23:31:26 +0000930 def close(self):
931 try:
932 if self._close_fileobj:
933 self._fileobj.close()
934 finally:
935 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000937
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000938class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000939 """ Class with methods to open, read, write, close, list zip files.
940
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200941 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000942
Fred Drake3d9091e2001-03-26 15:49:24 +0000943 file: Either the path to the file, or a file-like object.
944 If it is a path, the file will be opened and closed by ZipFile.
945 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200946 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
947 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000948 allowZip64: if True ZipFile will create files with ZIP64 extensions when
949 needed, otherwise it will raise an exception when this would
950 be necessary.
951
Fred Drake3d9091e2001-03-26 15:49:24 +0000952 """
Fred Drake484d7352000-10-02 21:14:52 +0000953
Fred Drake90eac282001-02-28 05:29:34 +0000954 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800955 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000956
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200957 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000958 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000959 if mode not in ("r", "w", "a"):
960 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
961
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200962 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000963
964 self._allowZip64 = allowZip64
965 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000966 self.debug = 0 # Level of printing: 0 through 3
967 self.NameToInfo = {} # Find file info given name
968 self.filelist = [] # List of ZipInfo instances for archive
969 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200970 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +0000971 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400972 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000973
Fred Drake3d9091e2001-03-26 15:49:24 +0000974 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000975 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000976 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000977 self._filePassed = 0
978 self.filename = file
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200979 modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
980 'r+b': 'w+b', 'w+b': 'wb'}
981 filemode = modeDict[mode]
982 while True:
983 try:
984 self.fp = io.open(file, filemode)
985 except OSError:
986 if filemode in modeDict:
987 filemode = modeDict[filemode]
988 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +0000989 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200990 break
Fred Drake3d9091e2001-03-26 15:49:24 +0000991 else:
992 self._filePassed = 1
993 self.fp = file
994 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200995 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200996 self._lock = threading.RLock()
Tim Petersa19a1682001-03-29 04:36:09 +0000997
Antoine Pitrou17babc52012-11-17 23:50:08 +0100998 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200999 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001000 self._RealGetContents()
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001001 elif mode == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +00001002 # set the modified flag so central directory gets written
1003 # even if no files are added to the archive
1004 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001005 self.start_dir = 0
1006 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001007 try:
1008 # See if file is a zip file
1009 self._RealGetContents()
1010 # seek to start of directory and overwrite
1011 self.fp.seek(self.start_dir, 0)
1012 except BadZipFile:
1013 # file is not a zip file, just append
1014 self.fp.seek(0, 2)
1015
1016 # set the modified flag so central directory gets written
1017 # even if no files are added to the archive
1018 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001019 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001020 else:
1021 raise RuntimeError('Mode must be "r", "w" or "a"')
1022 except:
1023 fp = self.fp
1024 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001025 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001026 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001028 def __enter__(self):
1029 return self
1030
1031 def __exit__(self, type, value, traceback):
1032 self.close()
1033
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001034 def __repr__(self):
1035 result = ['<%s.%s' % (self.__class__.__module__,
1036 self.__class__.__qualname__)]
1037 if self.fp is not None:
1038 if self._filePassed:
1039 result.append(' file=%r' % self.fp)
1040 elif self.filename is not None:
1041 result.append(' filename=%r' % self.filename)
1042 result.append(' mode=%r' % self.mode)
1043 else:
1044 result.append(' [closed]')
1045 result.append('>')
1046 return ''.join(result)
1047
Tim Peters7d3bad62001-04-04 18:56:49 +00001048 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001049 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001051 try:
1052 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001053 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001054 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001055 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001056 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001057 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001058 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001059 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1060 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001061 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001062
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001064 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001065 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1066 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001067 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1068
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001069 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001070 inferred = concat + offset_cd
1071 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 # self.start_dir: Position of start of central directory
1073 self.start_dir = offset_cd + concat
1074 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001075 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001076 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001077 total = 0
1078 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001079 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001080 if len(centdir) != sizeCentralDir:
1081 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001083 if centdir[_CD_SIGNATURE] != stringCentralDir:
1084 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001086 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001087 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001088 flags = centdir[5]
1089 if flags & 0x800:
1090 # UTF-8 file names extension
1091 filename = filename.decode('utf-8')
1092 else:
1093 # Historical ZIP filename encoding
1094 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001096 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001097 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1098 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001099 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001100 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001101 x.flag_bits, x.compress_type, t, d,
1102 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001103 if x.extract_version > MAX_EXTRACT_VERSION:
1104 raise NotImplementedError("zip file version %.1f" %
1105 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1107 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001108 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001110 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001111
1112 x._decodeExtra()
1113 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 self.filelist.append(x)
1115 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001116
1117 # update total bytes read from central directory
1118 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1119 + centdir[_CD_EXTRA_FIELD_LENGTH]
1120 + centdir[_CD_COMMENT_LENGTH])
1121
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001123 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001124
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125
1126 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001127 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001128 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129
1130 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001131 """Return a list of class ZipInfo instances for files in the
1132 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001133 return self.filelist
1134
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001135 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001136 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001137 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1138 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001140 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001141 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1142 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143
1144 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001145 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001146 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001147 for zinfo in self.filelist:
1148 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001149 # Read by chunks, to avoid an OverflowError or a
1150 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001151 with self.open(zinfo.filename, "r") as f:
1152 while f.read(chunk_size): # Check CRC-32
1153 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001154 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001155 return zinfo.filename
1156
1157 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001158 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001159 info = self.NameToInfo.get(name)
1160 if info is None:
1161 raise KeyError(
1162 'There is no item named %r in the archive' % name)
1163
1164 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001165
Thomas Wouterscf297e42007-02-23 15:07:44 +00001166 def setpassword(self, pwd):
1167 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001168 if pwd and not isinstance(pwd, bytes):
1169 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1170 if pwd:
1171 self.pwd = pwd
1172 else:
1173 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001174
R David Murrayf50b38a2012-04-12 18:44:58 -04001175 @property
1176 def comment(self):
1177 """The comment text associated with the ZIP file."""
1178 return self._comment
1179
1180 @comment.setter
1181 def comment(self, comment):
1182 if not isinstance(comment, bytes):
1183 raise TypeError("comment: expected bytes, got %s" % type(comment))
1184 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001185 if len(comment) > ZIP_MAX_COMMENT:
1186 import warnings
1187 warnings.warn('Archive comment is too long; truncating to %d bytes'
1188 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001189 comment = comment[:ZIP_MAX_COMMENT]
1190 self._comment = comment
1191 self._didModify = True
1192
Thomas Wouterscf297e42007-02-23 15:07:44 +00001193 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001194 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001195 with self.open(name, "r", pwd) as fp:
1196 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001197
1198 def open(self, name, mode="r", pwd=None):
1199 """Return file-like object for 'name'."""
1200 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001201 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001202 if 'U' in mode:
1203 import warnings
1204 warnings.warn("'U' mode is deprecated",
1205 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001206 if pwd and not isinstance(pwd, bytes):
1207 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001208 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001209 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001210 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001211
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001212 # Make sure we have an info object
1213 if isinstance(name, ZipInfo):
1214 # 'name' is already an info object
1215 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001217 # Get info object for name
1218 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001219
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001220 self._fileRefCnt += 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001221 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001222 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001223 # Skip the file header:
1224 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001225 if len(fheader) != sizeFileHeader:
1226 raise BadZipFile("Truncated file header")
1227 fheader = struct.unpack(structFileHeader, fheader)
1228 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001229 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230
Antoine Pitrou17babc52012-11-17 23:50:08 +01001231 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1232 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1233 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001234
Antoine Pitrou8572da52012-11-17 23:52:05 +01001235 if zinfo.flag_bits & 0x20:
1236 # Zip 2.7: compressed patched data
1237 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001238
Antoine Pitrou8572da52012-11-17 23:52:05 +01001239 if zinfo.flag_bits & 0x40:
1240 # strong encryption
1241 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001242
Antoine Pitrou17babc52012-11-17 23:50:08 +01001243 if zinfo.flag_bits & 0x800:
1244 # UTF-8 filename
1245 fname_str = fname.decode("utf-8")
1246 else:
1247 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001248
Antoine Pitrou17babc52012-11-17 23:50:08 +01001249 if fname_str != zinfo.orig_filename:
1250 raise BadZipFile(
1251 'File name in directory %r and header %r differ.'
1252 % (zinfo.orig_filename, fname))
1253
1254 # check for encrypted flag & handle password
1255 is_encrypted = zinfo.flag_bits & 0x1
1256 zd = None
1257 if is_encrypted:
1258 if not pwd:
1259 pwd = self.pwd
1260 if not pwd:
1261 raise RuntimeError("File %s is encrypted, password "
1262 "required for extraction" % name)
1263
1264 zd = _ZipDecrypter(pwd)
1265 # The first 12 bytes in the cypher stream is an encryption header
1266 # used to strengthen the algorithm. The first 11 bytes are
1267 # completely random, while the 12th contains the MSB of the CRC,
1268 # or the MSB of the file time depending on the header type
1269 # and is used to check the correctness of the password.
1270 header = zef_file.read(12)
1271 h = list(map(zd, header[0:12]))
1272 if zinfo.flag_bits & 0x8:
1273 # compare against the file type from extended local headers
1274 check_byte = (zinfo._raw_time >> 8) & 0xff
1275 else:
1276 # compare against the CRC otherwise
1277 check_byte = (zinfo.CRC >> 24) & 0xff
1278 if h[11] != check_byte:
1279 raise RuntimeError("Bad password for file", name)
1280
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001281 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001282 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001283 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001284 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001285
Christian Heimes790c8232008-01-07 21:14:23 +00001286 def extract(self, member, path=None, pwd=None):
1287 """Extract a member from the archive to the current working directory,
1288 using its full name. Its file information is extracted as accurately
1289 as possible. `member' may be a filename or a ZipInfo object. You can
1290 specify a different directory using `path'.
1291 """
1292 if not isinstance(member, ZipInfo):
1293 member = self.getinfo(member)
1294
1295 if path is None:
1296 path = os.getcwd()
1297
1298 return self._extract_member(member, path, pwd)
1299
1300 def extractall(self, path=None, members=None, pwd=None):
1301 """Extract all members from the archive to the current working
1302 directory. `path' specifies a different directory to extract to.
1303 `members' is optional and must be a subset of the list returned
1304 by namelist().
1305 """
1306 if members is None:
1307 members = self.namelist()
1308
1309 for zipinfo in members:
1310 self.extract(zipinfo, path, pwd)
1311
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001312 @classmethod
1313 def _sanitize_windows_name(cls, arcname, pathsep):
1314 """Replace bad characters and remove trailing dots from parts."""
1315 table = cls._windows_illegal_name_trans_table
1316 if not table:
1317 illegal = ':<>|"?*'
1318 table = str.maketrans(illegal, '_' * len(illegal))
1319 cls._windows_illegal_name_trans_table = table
1320 arcname = arcname.translate(table)
1321 # remove trailing dots
1322 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1323 # rejoin, removing empty parts.
1324 arcname = pathsep.join(x for x in arcname if x)
1325 return arcname
1326
Christian Heimes790c8232008-01-07 21:14:23 +00001327 def _extract_member(self, member, targetpath, pwd):
1328 """Extract the ZipInfo object 'member' to a physical
1329 file on the path targetpath.
1330 """
1331 # build the destination pathname, replacing
1332 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001333 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001334
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001335 if os.path.altsep:
1336 arcname = arcname.replace(os.path.altsep, os.path.sep)
1337 # interpret absolute pathname as relative, remove drive letter or
1338 # UNC path, redundant separators, "." and ".." components.
1339 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001340 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001341 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001342 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001343 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001344 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001345 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001346
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001347 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001348 targetpath = os.path.normpath(targetpath)
1349
1350 # Create all upper directories if necessary.
1351 upperdirs = os.path.dirname(targetpath)
1352 if upperdirs and not os.path.exists(upperdirs):
1353 os.makedirs(upperdirs)
1354
Martin v. Löwis59e47792009-01-24 14:10:07 +00001355 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001356 if not os.path.isdir(targetpath):
1357 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001358 return targetpath
1359
Antoine Pitrou17babc52012-11-17 23:50:08 +01001360 with self.open(member, pwd=pwd) as source, \
1361 open(targetpath, "wb") as target:
1362 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001363
1364 return targetpath
1365
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001367 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001368 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001369 import warnings
1370 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001372 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001374 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001375 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001376 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001377 if not self._allowZip64:
1378 requires_zip64 = None
1379 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1380 requires_zip64 = "Files count"
1381 elif zinfo.file_size > ZIP64_LIMIT:
1382 requires_zip64 = "Filesize"
1383 elif zinfo.header_offset > ZIP64_LIMIT:
1384 requires_zip64 = "Zipfile size"
1385 if requires_zip64:
1386 raise LargeZipFile(requires_zip64 +
1387 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001388
1389 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001390 """Put the bytes from filename into the archive under the name
1391 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001392 if not self.fp:
1393 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001394 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001395
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001397 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001398 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001399 date_time = mtime[0:6]
1400 # Create ZipInfo instance to store file information
1401 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001402 arcname = filename
1403 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1404 while arcname[0] in (os.sep, os.altsep):
1405 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001406 if isdir:
1407 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001408 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001409 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001410 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001411 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001412 else:
Tim Peterse1190062001-01-15 03:34:38 +00001413 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001414
1415 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001416 zinfo.flag_bits = 0x00
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001417 with self._lock:
1418 self.fp.seek(self.start_dir, 0)
1419 zinfo.header_offset = self.fp.tell() # Start of header bytes
1420 if zinfo.compress_type == ZIP_LZMA:
1421 # Compressed data includes an end-of-stream (EOS) marker
1422 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001423
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001424 self._writecheck(zinfo)
1425 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001426
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001427 if isdir:
1428 zinfo.file_size = 0
1429 zinfo.compress_size = 0
1430 zinfo.CRC = 0
1431 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1432 self.filelist.append(zinfo)
1433 self.NameToInfo[zinfo.filename] = zinfo
1434 self.fp.write(zinfo.FileHeader(False))
1435 self.start_dir = self.fp.tell()
1436 return
1437
1438 cmpr = _get_compressor(zinfo.compress_type)
1439 with open(filename, "rb") as fp:
1440 # Must overwrite CRC and sizes with correct data later
1441 zinfo.CRC = CRC = 0
1442 zinfo.compress_size = compress_size = 0
1443 # Compressed size can be larger than uncompressed size
1444 zip64 = self._allowZip64 and \
1445 zinfo.file_size * 1.05 > ZIP64_LIMIT
1446 self.fp.write(zinfo.FileHeader(zip64))
1447 file_size = 0
1448 while 1:
1449 buf = fp.read(1024 * 8)
1450 if not buf:
1451 break
1452 file_size = file_size + len(buf)
1453 CRC = crc32(buf, CRC) & 0xffffffff
1454 if cmpr:
1455 buf = cmpr.compress(buf)
1456 compress_size = compress_size + len(buf)
1457 self.fp.write(buf)
1458 if cmpr:
1459 buf = cmpr.flush()
1460 compress_size = compress_size + len(buf)
1461 self.fp.write(buf)
1462 zinfo.compress_size = compress_size
1463 else:
1464 zinfo.compress_size = file_size
1465 zinfo.CRC = CRC
1466 zinfo.file_size = file_size
1467 if not zip64 and self._allowZip64:
1468 if file_size > ZIP64_LIMIT:
1469 raise RuntimeError('File size has increased during compressing')
1470 if compress_size > ZIP64_LIMIT:
1471 raise RuntimeError('Compressed size larger than uncompressed size')
1472 # Seek backwards and write file header (which will now include
1473 # correct CRC and file sizes)
1474 self.start_dir = self.fp.tell() # Preserve current position in file
1475 self.fp.seek(zinfo.header_offset, 0)
1476 self.fp.write(zinfo.FileHeader(zip64))
1477 self.fp.seek(self.start_dir, 0)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001478 self.filelist.append(zinfo)
1479 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001480
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001481 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001482 """Write a file into the archive. The contents is 'data', which
1483 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1484 it is encoded as UTF-8 first.
1485 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001486 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001487 if isinstance(data, str):
1488 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001489 if not isinstance(zinfo_or_arcname, ZipInfo):
1490 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001491 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001492 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001493 if zinfo.filename[-1] == '/':
1494 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1495 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1496 else:
1497 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001498 else:
1499 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001500
1501 if not self.fp:
1502 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001503 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001504
Guido van Rossum85825dc2007-08-27 17:03:28 +00001505 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001506 with self._lock:
1507 self.fp.seek(self.start_dir, 0)
1508 zinfo.header_offset = self.fp.tell() # Start of header data
1509 if compress_type is not None:
1510 zinfo.compress_type = compress_type
1511 if zinfo.compress_type == ZIP_LZMA:
1512 # Compressed data includes an end-of-stream (EOS) marker
1513 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001514
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001515 self._writecheck(zinfo)
1516 self._didModify = True
1517 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1518 co = _get_compressor(zinfo.compress_type)
1519 if co:
1520 data = co.compress(data) + co.flush()
1521 zinfo.compress_size = len(data) # Compressed size
1522 else:
1523 zinfo.compress_size = zinfo.file_size
1524 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1525 zinfo.compress_size > ZIP64_LIMIT
1526 if zip64 and not self._allowZip64:
1527 raise LargeZipFile("Filesize would require ZIP64 extensions")
1528 self.fp.write(zinfo.FileHeader(zip64))
1529 self.fp.write(data)
1530 if zinfo.flag_bits & 0x08:
1531 # Write CRC and file sizes after the file data
1532 fmt = '<LQQ' if zip64 else '<LLL'
1533 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1534 zinfo.file_size))
1535 self.fp.flush()
1536 self.start_dir = self.fp.tell()
1537 self.filelist.append(zinfo)
1538 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001539
1540 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001541 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001542 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001543
1544 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001545 """Close the file, and for mode "w" and "a" write the ending
1546 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001547 if self.fp is None:
1548 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001549
Antoine Pitrou17babc52012-11-17 23:50:08 +01001550 try:
1551 if self.mode in ("w", "a") and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001552 with self._lock:
1553 self.fp.seek(self.start_dir, 0)
1554 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001555 finally:
1556 fp = self.fp
1557 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001558 self._fpclose(fp)
1559
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001560 def _write_end_record(self):
1561 self.fp.seek(self.start_dir, 0)
1562 for zinfo in self.filelist: # write central directory
1563 dt = zinfo.date_time
1564 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1565 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1566 extra = []
1567 if zinfo.file_size > ZIP64_LIMIT \
1568 or zinfo.compress_size > ZIP64_LIMIT:
1569 extra.append(zinfo.file_size)
1570 extra.append(zinfo.compress_size)
1571 file_size = 0xffffffff
1572 compress_size = 0xffffffff
1573 else:
1574 file_size = zinfo.file_size
1575 compress_size = zinfo.compress_size
1576
1577 if zinfo.header_offset > ZIP64_LIMIT:
1578 extra.append(zinfo.header_offset)
1579 header_offset = 0xffffffff
1580 else:
1581 header_offset = zinfo.header_offset
1582
1583 extra_data = zinfo.extra
1584 min_version = 0
1585 if extra:
1586 # Append a ZIP64 field to the extra's
1587 extra_data = struct.pack(
1588 '<HH' + 'Q'*len(extra),
1589 1, 8*len(extra), *extra) + extra_data
1590
1591 min_version = ZIP64_VERSION
1592
1593 if zinfo.compress_type == ZIP_BZIP2:
1594 min_version = max(BZIP2_VERSION, min_version)
1595 elif zinfo.compress_type == ZIP_LZMA:
1596 min_version = max(LZMA_VERSION, min_version)
1597
1598 extract_version = max(min_version, zinfo.extract_version)
1599 create_version = max(min_version, zinfo.create_version)
1600 try:
1601 filename, flag_bits = zinfo._encodeFilenameFlags()
1602 centdir = struct.pack(structCentralDir,
1603 stringCentralDir, create_version,
1604 zinfo.create_system, extract_version, zinfo.reserved,
1605 flag_bits, zinfo.compress_type, dostime, dosdate,
1606 zinfo.CRC, compress_size, file_size,
1607 len(filename), len(extra_data), len(zinfo.comment),
1608 0, zinfo.internal_attr, zinfo.external_attr,
1609 header_offset)
1610 except DeprecationWarning:
1611 print((structCentralDir, stringCentralDir, create_version,
1612 zinfo.create_system, extract_version, zinfo.reserved,
1613 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1614 zinfo.CRC, compress_size, file_size,
1615 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1616 0, zinfo.internal_attr, zinfo.external_attr,
1617 header_offset), file=sys.stderr)
1618 raise
1619 self.fp.write(centdir)
1620 self.fp.write(filename)
1621 self.fp.write(extra_data)
1622 self.fp.write(zinfo.comment)
1623
1624 pos2 = self.fp.tell()
1625 # Write end-of-zip-archive record
1626 centDirCount = len(self.filelist)
1627 centDirSize = pos2 - self.start_dir
1628 centDirOffset = self.start_dir
1629 requires_zip64 = None
1630 if centDirCount > ZIP_FILECOUNT_LIMIT:
1631 requires_zip64 = "Files count"
1632 elif centDirOffset > ZIP64_LIMIT:
1633 requires_zip64 = "Central directory offset"
1634 elif centDirSize > ZIP64_LIMIT:
1635 requires_zip64 = "Central directory size"
1636 if requires_zip64:
1637 # Need to write the ZIP64 end-of-archive records
1638 if not self._allowZip64:
1639 raise LargeZipFile(requires_zip64 +
1640 " would require ZIP64 extensions")
1641 zip64endrec = struct.pack(
1642 structEndArchive64, stringEndArchive64,
1643 44, 45, 45, 0, 0, centDirCount, centDirCount,
1644 centDirSize, centDirOffset)
1645 self.fp.write(zip64endrec)
1646
1647 zip64locrec = struct.pack(
1648 structEndArchive64Locator,
1649 stringEndArchive64Locator, 0, pos2, 1)
1650 self.fp.write(zip64locrec)
1651 centDirCount = min(centDirCount, 0xFFFF)
1652 centDirSize = min(centDirSize, 0xFFFFFFFF)
1653 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1654
1655 endrec = struct.pack(structEndArchive, stringEndArchive,
1656 0, 0, centDirCount, centDirCount,
1657 centDirSize, centDirOffset, len(self._comment))
1658 self.fp.write(endrec)
1659 self.fp.write(self._comment)
1660 self.fp.flush()
1661
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001662 def _fpclose(self, fp):
1663 assert self._fileRefCnt > 0
1664 self._fileRefCnt -= 1
1665 if not self._fileRefCnt and not self._filePassed:
1666 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001667
1668
1669class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001670 """Class to create ZIP archives with Python library files and packages."""
1671
Georg Brandl8334fd92010-12-04 10:26:46 +00001672 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001673 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001674 ZipFile.__init__(self, file, mode=mode, compression=compression,
1675 allowZip64=allowZip64)
1676 self._optimize = optimize
1677
Christian Tismer59202e52013-10-21 03:59:23 +02001678 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001679 """Add all files from "pathname" to the ZIP archive.
1680
Fred Drake484d7352000-10-02 21:14:52 +00001681 If pathname is a package directory, search the directory and
1682 all package subdirectories recursively for all *.py and enter
1683 the modules into the archive. If pathname is a plain
1684 directory, listdir *.py and enter all modules. Else, pathname
1685 must be a Python *.py file and the module will be put into the
1686 archive. Added modules are always module.pyo or module.pyc.
1687 This method will compile the module.py into module.pyc if
1688 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001689 If filterfunc(pathname) is given, it is called with every argument.
1690 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001691 """
Christian Tismer59202e52013-10-21 03:59:23 +02001692 if filterfunc and not filterfunc(pathname):
1693 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001694 label = 'path' if os.path.isdir(pathname) else 'file'
1695 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001696 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001697 dir, name = os.path.split(pathname)
1698 if os.path.isdir(pathname):
1699 initname = os.path.join(pathname, "__init__.py")
1700 if os.path.isfile(initname):
1701 # This is a package directory, add it
1702 if basename:
1703 basename = "%s/%s" % (basename, name)
1704 else:
1705 basename = name
1706 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001707 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001708 fname, arcname = self._get_codename(initname[0:-3], basename)
1709 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001710 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001711 self.write(fname, arcname)
1712 dirlist = os.listdir(pathname)
1713 dirlist.remove("__init__.py")
1714 # Add all *.py files and package subdirectories
1715 for filename in dirlist:
1716 path = os.path.join(pathname, filename)
1717 root, ext = os.path.splitext(filename)
1718 if os.path.isdir(path):
1719 if os.path.isfile(os.path.join(path, "__init__.py")):
1720 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001721 self.writepy(path, basename,
1722 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001723 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001724 if filterfunc and not filterfunc(path):
1725 if self.debug:
1726 print('file "%s" skipped by filterfunc' % path)
1727 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001728 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001729 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001730 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001731 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001732 self.write(fname, arcname)
1733 else:
1734 # This is NOT a package directory, add its files at top level
1735 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001736 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001737 for filename in os.listdir(pathname):
1738 path = os.path.join(pathname, filename)
1739 root, ext = os.path.splitext(filename)
1740 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001741 if filterfunc and not filterfunc(path):
1742 if self.debug:
1743 print('file "%s" skipped by filterfunc' % path)
1744 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001745 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001746 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001747 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001748 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001749 self.write(fname, arcname)
1750 else:
1751 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001752 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001753 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001754 fname, arcname = self._get_codename(pathname[0:-3], basename)
1755 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001756 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001757 self.write(fname, arcname)
1758
1759 def _get_codename(self, pathname, basename):
1760 """Return (filename, archivename) for the path.
1761
Fred Drake484d7352000-10-02 21:14:52 +00001762 Given a module name path, return the correct file path and
1763 archive name, compiling if necessary. For example, given
1764 /python/lib/string, return (/python/lib/string.pyc, string).
1765 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001766 def _compile(file, optimize=-1):
1767 import py_compile
1768 if self.debug:
1769 print("Compiling", file)
1770 try:
1771 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001772 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001773 print(err.msg)
1774 return False
1775 return True
1776
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001777 file_py = pathname + ".py"
1778 file_pyc = pathname + ".pyc"
1779 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001780 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1781 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001782 if self._optimize == -1:
1783 # legacy mode: use whatever file is present
1784 if (os.path.isfile(file_pyo) and
1785 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1786 # Use .pyo file.
1787 arcname = fname = file_pyo
1788 elif (os.path.isfile(file_pyc) and
1789 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1790 # Use .pyc file.
1791 arcname = fname = file_pyc
1792 elif (os.path.isfile(pycache_pyc) and
1793 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1794 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1795 # file name in the archive.
1796 fname = pycache_pyc
1797 arcname = file_pyc
1798 elif (os.path.isfile(pycache_pyo) and
1799 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1800 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1801 # file name in the archive.
1802 fname = pycache_pyo
1803 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001804 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001805 # Compile py into PEP 3147 pyc file.
1806 if _compile(file_py):
1807 fname = (pycache_pyc if __debug__ else pycache_pyo)
1808 arcname = (file_pyc if __debug__ else file_pyo)
1809 else:
1810 fname = arcname = file_py
1811 else:
1812 # new mode: use given optimization level
1813 if self._optimize == 0:
1814 fname = pycache_pyc
1815 arcname = file_pyc
1816 else:
1817 fname = pycache_pyo
1818 arcname = file_pyo
1819 if not (os.path.isfile(fname) and
1820 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1821 if not _compile(file_py, optimize=self._optimize):
1822 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001823 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001824 if basename:
1825 archivename = "%s/%s" % (basename, archivename)
1826 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001827
1828
1829def main(args = None):
1830 import textwrap
1831 USAGE=textwrap.dedent("""\
1832 Usage:
1833 zipfile.py -l zipfile.zip # Show listing of a zipfile
1834 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1835 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1836 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1837 """)
1838 if args is None:
1839 args = sys.argv[1:]
1840
1841 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001842 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001843 sys.exit(1)
1844
1845 if args[0] == '-l':
1846 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001847 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001848 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001849 with ZipFile(args[1], 'r') as zf:
1850 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001851
1852 elif args[0] == '-t':
1853 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001854 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001855 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001856 with ZipFile(args[1], 'r') as zf:
1857 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001858 if badfile:
1859 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001860 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001861
1862 elif args[0] == '-e':
1863 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001864 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001865 sys.exit(1)
1866
Antoine Pitrou17babc52012-11-17 23:50:08 +01001867 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001868 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001869
1870 elif args[0] == '-c':
1871 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001872 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001873 sys.exit(1)
1874
1875 def addToZip(zf, path, zippath):
1876 if os.path.isfile(path):
1877 zf.write(path, zippath, ZIP_DEFLATED)
1878 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001879 if zippath:
1880 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001881 for nm in os.listdir(path):
1882 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001883 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001884 # else: ignore
1885
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001886 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001887 for path in args[2:]:
1888 zippath = os.path.basename(path)
1889 if not zippath:
1890 zippath = os.path.basename(os.path.dirname(path))
1891 if zippath in ('', os.curdir, os.pardir):
1892 zippath = ''
1893 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001894
1895if __name__ == "__main__":
1896 main()