blob: 55afa0850536b1afee9ee19c9654745386d63096 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
Serhiy Storchakaf15e5242015-01-26 13:53:38 +020016import threading
Barry Warsaw28a691b2010-04-17 00:19:56 +000017
Guido van Rossum32abe6f2000-03-31 17:30:02 +000018
19try:
Tim Peterse1190062001-01-15 03:34:38 +000020 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000021 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040022except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000023 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000025
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020026try:
27 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029 bz2 = None
30
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020031try:
32 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040033except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034 lzma = None
35
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020036__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000038 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000039
Georg Brandl4d540882010-10-28 06:42:33 +000040class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000041 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000042
43
44class LargeZipFile(Exception):
45 """
46 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
47 and those extensions are disabled.
48 """
49
Georg Brandl4d540882010-10-28 06:42:33 +000050error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
51
Guido van Rossum32abe6f2000-03-31 17:30:02 +000052
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000053ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030054ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000056
Guido van Rossum32abe6f2000-03-31 17:30:02 +000057# constants for Zip file compression methods
58ZIP_STORED = 0
59ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020060ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020061ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000062# Other ZIP compression methods not supported
63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020064DEFAULT_VERSION = 20
65ZIP64_VERSION = 45
66BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020067LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020068# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020069MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020070
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071# Below are some formats and associated data for reading/writing headers using
72# the struct module. The names and structures of headers/records are those used
73# in the PKWARE description of the ZIP file format:
74# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
75# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000076
Martin v. Löwisb09b8442008-07-03 14:13:42 +000077# The "end of central directory" structure, magic number, size, and indices
78# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000079structEndArchive = b"<4s4H2LH"
80stringEndArchive = b"PK\005\006"
81sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000082
83_ECD_SIGNATURE = 0
84_ECD_DISK_NUMBER = 1
85_ECD_DISK_START = 2
86_ECD_ENTRIES_THIS_DISK = 3
87_ECD_ENTRIES_TOTAL = 4
88_ECD_SIZE = 5
89_ECD_OFFSET = 6
90_ECD_COMMENT_SIZE = 7
91# These last two indices are not part of the structure as defined in the
92# spec, but they are used internally by this module as a convenience
93_ECD_COMMENT = 8
94_ECD_LOCATION = 9
95
96# The "central directory" structure, magic number, size, and indices
97# of entries in the structure (section V.F in the format document)
98structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000099stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100sizeCentralDir = struct.calcsize(structCentralDir)
101
Fred Drake3e038e52001-02-28 17:56:26 +0000102# indexes of entries in the central directory structure
103_CD_SIGNATURE = 0
104_CD_CREATE_VERSION = 1
105_CD_CREATE_SYSTEM = 2
106_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000107_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000108_CD_FLAG_BITS = 5
109_CD_COMPRESS_TYPE = 6
110_CD_TIME = 7
111_CD_DATE = 8
112_CD_CRC = 9
113_CD_COMPRESSED_SIZE = 10
114_CD_UNCOMPRESSED_SIZE = 11
115_CD_FILENAME_LENGTH = 12
116_CD_EXTRA_FIELD_LENGTH = 13
117_CD_COMMENT_LENGTH = 14
118_CD_DISK_NUMBER_START = 15
119_CD_INTERNAL_FILE_ATTRIBUTES = 16
120_CD_EXTERNAL_FILE_ATTRIBUTES = 17
121_CD_LOCAL_HEADER_OFFSET = 18
122
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123# The "local file header" structure, magic number, size, and indices
124# (section V.A in the format document)
125structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000126stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000127sizeFileHeader = struct.calcsize(structFileHeader)
128
Fred Drake3e038e52001-02-28 17:56:26 +0000129_FH_SIGNATURE = 0
130_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_GENERAL_PURPOSE_FLAG_BITS = 3
133_FH_COMPRESSION_METHOD = 4
134_FH_LAST_MOD_TIME = 5
135_FH_LAST_MOD_DATE = 6
136_FH_CRC = 7
137_FH_COMPRESSED_SIZE = 8
138_FH_UNCOMPRESSED_SIZE = 9
139_FH_FILENAME_LENGTH = 10
140_FH_EXTRA_FIELD_LENGTH = 11
141
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000143structEndArchive64Locator = "<4sLQL"
144stringEndArchive64Locator = b"PK\x06\x07"
145sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000146
147# The "Zip64 end of central directory" record, magic number, size, and indices
148# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000149structEndArchive64 = "<4sQ2H2L4Q"
150stringEndArchive64 = b"PK\x06\x06"
151sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000152
153_CD64_SIGNATURE = 0
154_CD64_DIRECTORY_RECSIZE = 1
155_CD64_CREATE_VERSION = 2
156_CD64_EXTRACT_VERSION = 3
157_CD64_DISK_NUMBER = 4
158_CD64_DISK_NUMBER_START = 5
159_CD64_NUMBER_ENTRIES_THIS_DISK = 6
160_CD64_NUMBER_ENTRIES_TOTAL = 7
161_CD64_DIRECTORY_SIZE = 8
162_CD64_OFFSET_START_CENTDIR = 9
163
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000164def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000165 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000166 if _EndRecData(fp):
167 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200168 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000169 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000170 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000171
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000172def is_zipfile(filename):
173 """Quickly see if a file is a ZIP file by checking the magic number.
174
175 The filename argument may be a file or file-like object too.
176 """
177 result = False
178 try:
179 if hasattr(filename, "read"):
180 result = _check_zipfile(fp=filename)
181 else:
182 with open(filename, "rb") as fp:
183 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200184 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000185 pass
186 return result
187
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000188def _EndRecData64(fpin, offset, endrec):
189 """
190 Read the ZIP64 end-of-archive records and use that to update endrec
191 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000192 try:
193 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200194 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 # If the seek fails, the file is not large enough to contain a ZIP64
196 # end-of-archive record, so just return the end record we were given.
197 return endrec
198
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000199 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200200 if len(data) != sizeEndCentDir64Locator:
201 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000202 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
203 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 return endrec
205
206 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000207 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000208
209 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000210 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
211 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200212 if len(data) != sizeEndCentDir64:
213 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200215 dircount, dircount2, dirsize, diroffset = \
216 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000218 return endrec
219
220 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000221 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222 endrec[_ECD_DISK_NUMBER] = disk_num
223 endrec[_ECD_DISK_START] = disk_dir
224 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
225 endrec[_ECD_ENTRIES_TOTAL] = dircount2
226 endrec[_ECD_SIZE] = dirsize
227 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000228 return endrec
229
230
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000231def _EndRecData(fpin):
232 """Return data from the "End of Central Directory" record, or None.
233
234 The data is a list of the nine items in the ZIP "End of central dir"
235 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236
237 # Determine file size
238 fpin.seek(0, 2)
239 filesize = fpin.tell()
240
241 # Check to see if this is ZIP file with no archive comment (the
242 # "end of central directory" structure should be the last item in the
243 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000244 try:
245 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200246 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200249 if (len(data) == sizeEndCentDir and
250 data[0:4] == stringEndArchive and
251 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000253 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec=list(endrec)
255
256 # Append a blank comment and record start offset
257 endrec.append(b"")
258 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000260 # Try to read the "Zip64 end of central directory" structure
261 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
263 # Either this is not a ZIP file, or it is a ZIP file with an archive
264 # comment. Search the end of the file for the "end of central directory"
265 # record signature. The comment is the last item in the ZIP file and may be
266 # up to 64K long. It is assumed that the "end of central directory" magic
267 # number does not appear in the comment.
268 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
269 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000270 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000271 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000272 if start >= 0:
273 # found the magic number; attempt to unpack and interpret
274 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200275 if len(recData) != sizeEndCentDir:
276 # Zip file is corrupted.
277 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000278 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400279 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
280 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
281 endrec.append(comment)
282 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000283
R David Murray4fbb9db2011-06-09 15:50:51 -0400284 # Try to read the "Zip64 end of central directory" structure
285 return _EndRecData64(fpin, maxCommentStart + start - filesize,
286 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000287
288 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200289 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000290
Fred Drake484d7352000-10-02 21:14:52 +0000291
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000292class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000293 """Class with attributes describing each file in the ZIP archive."""
294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200296 'orig_filename',
297 'filename',
298 'date_time',
299 'compress_type',
300 'comment',
301 'extra',
302 'create_system',
303 'create_version',
304 'extract_version',
305 'reserved',
306 'flag_bits',
307 'volume',
308 'internal_attr',
309 'external_attr',
310 'header_offset',
311 'CRC',
312 'compress_size',
313 'file_size',
314 '_raw_time',
315 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000316
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000317 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000318 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
320 # Terminate the file name at the first null byte. Null bytes in file
321 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000322 null_byte = filename.find(chr(0))
323 if null_byte >= 0:
324 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325 # This is used to ensure paths in generated ZIP files always use
326 # forward slashes as the directory separator, as required by the
327 # ZIP format specification.
328 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000329 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330
Greg Ward8e36d282003-06-18 00:53:06 +0000331 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000332 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800333
334 if date_time[0] < 1980:
335 raise ValueError('ZIP does not support timestamps before 1980')
336
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000338 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000339 self.comment = b"" # Comment for each file
340 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000341 if sys.platform == 'win32':
342 self.create_system = 0 # System which created ZIP archive
343 else:
344 # Assume everything else is unix-y
345 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200346 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
347 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000348 self.reserved = 0 # Must be zero
349 self.flag_bits = 0 # ZIP flag bits
350 self.volume = 0 # Volume number of file header
351 self.internal_attr = 0 # Internal attributes
352 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000353 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000354 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000355 # CRC CRC-32 of the uncompressed file
356 # compress_size Size of the compressed file
357 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200359 def __repr__(self):
360 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
361 if self.compress_type != ZIP_STORED:
362 result.append(' compress_type=%s' %
363 compressor_names.get(self.compress_type,
364 self.compress_type))
365 hi = self.external_attr >> 16
366 lo = self.external_attr & 0xFFFF
367 if hi:
368 result.append(' filemode=%r' % stat.filemode(hi))
369 if lo:
370 result.append(' external_attr=%#x' % lo)
371 isdir = self.filename[-1:] == '/'
372 if not isdir or self.file_size:
373 result.append(' file_size=%r' % self.file_size)
374 if ((not isdir or self.compress_size) and
375 (self.compress_type != ZIP_STORED or
376 self.file_size != self.compress_size)):
377 result.append(' compress_size=%r' % self.compress_size)
378 result.append('>')
379 return ''.join(result)
380
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200381 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000382 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000383 dt = self.date_time
384 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000385 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000387 # Set these to zero because we write them after the file data
388 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 else:
Tim Peterse1190062001-01-15 03:34:38 +0000390 CRC = self.CRC
391 compress_size = self.compress_size
392 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393
394 extra = self.extra
395
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200396 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200397 if zip64 is None:
398 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
399 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000400 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000401 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200402 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200403 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
404 if not zip64:
405 raise LargeZipFile("Filesize would require ZIP64 extensions")
406 # File is larger than what fits into a 4 byte integer,
407 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 file_size = 0xffffffff
409 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200410 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200412 if self.compress_type == ZIP_BZIP2:
413 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200414 elif self.compress_type == ZIP_LZMA:
415 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200416
417 self.extract_version = max(min_version, self.extract_version)
418 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000419 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000420 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200421 self.extract_version, self.reserved, flag_bits,
422 self.compress_type, dostime, dosdate, CRC,
423 compress_size, file_size,
424 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000425 return header + filename + extra
426
427 def _encodeFilenameFlags(self):
428 try:
429 return self.filename.encode('ascii'), self.flag_bits
430 except UnicodeEncodeError:
431 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432
433 def _decodeExtra(self):
434 # Try to decode the extra field.
435 extra = self.extra
436 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700437 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000438 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000439 if tp == 1:
440 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000443 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000444 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000445 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000446 elif ln == 0:
447 counts = ()
448 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000449 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000450
451 idx = 0
452
453 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000454 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000455 self.file_size = counts[idx]
456 idx += 1
457
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000458 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000459 self.compress_size = counts[idx]
460 idx += 1
461
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000462 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000463 old = self.header_offset
464 self.header_offset = counts[idx]
465 idx+=1
466
467 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000468
469
Thomas Wouterscf297e42007-02-23 15:07:44 +0000470class _ZipDecrypter:
471 """Class to handle decryption of files stored within a ZIP archive.
472
473 ZIP supports a password-based form of encryption. Even though known
474 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000475 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000476
477 Usage:
478 zd = _ZipDecrypter(mypwd)
479 plain_char = zd(cypher_char)
480 plain_text = map(zd, cypher_text)
481 """
482
483 def _GenerateCRCTable():
484 """Generate a CRC-32 table.
485
486 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
487 internal keys. We noticed that a direct implementation is faster than
488 relying on binascii.crc32().
489 """
490 poly = 0xedb88320
491 table = [0] * 256
492 for i in range(256):
493 crc = i
494 for j in range(8):
495 if crc & 1:
496 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
497 else:
498 crc = ((crc >> 1) & 0x7FFFFFFF)
499 table[i] = crc
500 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500501 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502
503 def _crc32(self, ch, crc):
504 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000505 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000506
507 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500508 if _ZipDecrypter.crctable is None:
509 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000510 self.key0 = 305419896
511 self.key1 = 591751049
512 self.key2 = 878082192
513 for p in pwd:
514 self._UpdateKeys(p)
515
516 def _UpdateKeys(self, c):
517 self.key0 = self._crc32(c, self.key0)
518 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
519 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000520 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000521
522 def __call__(self, c):
523 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000524 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000525 k = self.key2 | 2
526 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000527 self._UpdateKeys(c)
528 return c
529
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200530
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200531class LZMACompressor:
532
533 def __init__(self):
534 self._comp = None
535
536 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200537 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200538 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200539 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200540 ])
541 return struct.pack('<BBH', 9, 4, len(props)) + props
542
543 def compress(self, data):
544 if self._comp is None:
545 return self._init() + self._comp.compress(data)
546 return self._comp.compress(data)
547
548 def flush(self):
549 if self._comp is None:
550 return self._init() + self._comp.flush()
551 return self._comp.flush()
552
553
554class LZMADecompressor:
555
556 def __init__(self):
557 self._decomp = None
558 self._unconsumed = b''
559 self.eof = False
560
561 def decompress(self, data):
562 if self._decomp is None:
563 self._unconsumed += data
564 if len(self._unconsumed) <= 4:
565 return b''
566 psize, = struct.unpack('<H', self._unconsumed[2:4])
567 if len(self._unconsumed) <= 4 + psize:
568 return b''
569
570 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200571 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
572 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200573 ])
574 data = self._unconsumed[4 + psize:]
575 del self._unconsumed
576
577 result = self._decomp.decompress(data)
578 self.eof = self._decomp.eof
579 return result
580
581
582compressor_names = {
583 0: 'store',
584 1: 'shrink',
585 2: 'reduce',
586 3: 'reduce',
587 4: 'reduce',
588 5: 'reduce',
589 6: 'implode',
590 7: 'tokenize',
591 8: 'deflate',
592 9: 'deflate64',
593 10: 'implode',
594 12: 'bzip2',
595 14: 'lzma',
596 18: 'terse',
597 19: 'lz77',
598 97: 'wavpack',
599 98: 'ppmd',
600}
601
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200602def _check_compression(compression):
603 if compression == ZIP_STORED:
604 pass
605 elif compression == ZIP_DEFLATED:
606 if not zlib:
607 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200608 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200609 elif compression == ZIP_BZIP2:
610 if not bz2:
611 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200612 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200613 elif compression == ZIP_LZMA:
614 if not lzma:
615 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200616 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200617 else:
618 raise RuntimeError("That compression method is not supported")
619
620
621def _get_compressor(compress_type):
622 if compress_type == ZIP_DEFLATED:
623 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200624 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200625 elif compress_type == ZIP_BZIP2:
626 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200627 elif compress_type == ZIP_LZMA:
628 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200629 else:
630 return None
631
632
633def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200634 if compress_type == ZIP_STORED:
635 return None
636 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200637 return zlib.decompressobj(-15)
638 elif compress_type == ZIP_BZIP2:
639 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200640 elif compress_type == ZIP_LZMA:
641 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200642 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200643 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200644 if descr:
645 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
646 else:
647 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200648
649
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200650class _SharedFile:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200651 def __init__(self, file, pos, close, lock):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200652 self._file = file
653 self._pos = pos
654 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200655 self._lock = lock
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200656
657 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200658 with self._lock:
659 self._file.seek(self._pos)
660 data = self._file.read(n)
661 self._pos = self._file.tell()
662 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200663
664 def close(self):
665 if self._file is not None:
666 fileobj = self._file
667 self._file = None
668 self._close(fileobj)
669
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200670# Provide the tell method for unseekable stream
671class _Tellable:
672 def __init__(self, fp):
673 self.fp = fp
674 self.offset = 0
675
676 def write(self, data):
677 n = self.fp.write(data)
678 self.offset += n
679 return n
680
681 def tell(self):
682 return self.offset
683
684 def flush(self):
685 self.fp.flush()
686
687 def close(self):
688 self.fp.close()
689
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200690
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000691class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000692 """File-like object for reading an archive member.
693 Is returned by ZipFile.open().
694 """
695
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000696 # Max size supported by decompressor.
697 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000698
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000699 # Read from compressed files in 4k blocks.
700 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000701
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000702 # Search for universal newlines or line chunks.
703 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
704
Łukasz Langae94980a2010-11-22 23:31:26 +0000705 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
706 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000707 self._fileobj = fileobj
708 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000709 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000710
Ezio Melotti92b47432010-01-28 01:44:41 +0000711 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000712 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200713 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000714
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200715 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000716
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200717 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000718 self._readbuffer = b''
719 self._offset = 0
720
721 self._universal = 'U' in mode
722 self.newlines = None
723
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000724 # Adjust read size for encrypted files since the first 12 bytes
725 # are for the encryption/password information.
726 if self._decrypter is not None:
727 self._compress_left -= 12
728
729 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000730 self.name = zipinfo.filename
731
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000732 if hasattr(zipinfo, 'CRC'):
733 self._expected_crc = zipinfo.CRC
734 self._running_crc = crc32(b'') & 0xffffffff
735 else:
736 self._expected_crc = None
737
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200738 def __repr__(self):
739 result = ['<%s.%s' % (self.__class__.__module__,
740 self.__class__.__qualname__)]
741 if not self.closed:
742 result.append(' name=%r mode=%r' % (self.name, self.mode))
743 if self._compress_type != ZIP_STORED:
744 result.append(' compress_type=%s' %
745 compressor_names.get(self._compress_type,
746 self._compress_type))
747 else:
748 result.append(' [closed]')
749 result.append('>')
750 return ''.join(result)
751
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000752 def readline(self, limit=-1):
753 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000755 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000756 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000757
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000758 if not self._universal and limit < 0:
759 # Shortcut common case - newline found in buffer.
760 i = self._readbuffer.find(b'\n', self._offset) + 1
761 if i > 0:
762 line = self._readbuffer[self._offset: i]
763 self._offset = i
764 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000765
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000766 if not self._universal:
767 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000768
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000769 line = b''
770 while limit < 0 or len(line) < limit:
771 readahead = self.peek(2)
772 if readahead == b'':
773 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000775 #
776 # Search for universal newlines or line chunks.
777 #
778 # The pattern returns either a line chunk or a newline, but not
779 # both. Combined with peek(2), we are assured that the sequence
780 # '\r\n' is always retrieved completely and never split into
781 # separate newlines - '\r', '\n' due to coincidental readaheads.
782 #
783 match = self.PATTERN.search(readahead)
784 newline = match.group('newline')
785 if newline is not None:
786 if self.newlines is None:
787 self.newlines = []
788 if newline not in self.newlines:
789 self.newlines.append(newline)
790 self._offset += len(newline)
791 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000792
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793 chunk = match.group('chunk')
794 if limit >= 0:
795 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797 self._offset += len(chunk)
798 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000800 return line
801
802 def peek(self, n=1):
803 """Returns buffered bytes without advancing the position."""
804 if n > len(self._readbuffer) - self._offset:
805 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200806 if len(chunk) > self._offset:
807 self._readbuffer = chunk + self._readbuffer[self._offset:]
808 self._offset = 0
809 else:
810 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000811
812 # Return up to 512 bytes to reduce allocation overhead for tight loops.
813 return self._readbuffer[self._offset: self._offset + 512]
814
815 def readable(self):
816 return True
817
818 def read(self, n=-1):
819 """Read and return up to n bytes.
820 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200822 if n is None or n < 0:
823 buf = self._readbuffer[self._offset:]
824 self._readbuffer = b''
825 self._offset = 0
826 while not self._eof:
827 buf += self._read1(self.MAX_N)
828 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829
Antoine Pitrou78157b32012-06-23 16:44:48 +0200830 end = n + self._offset
831 if end < len(self._readbuffer):
832 buf = self._readbuffer[self._offset:end]
833 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200834 return buf
835
Antoine Pitrou78157b32012-06-23 16:44:48 +0200836 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200837 buf = self._readbuffer[self._offset:]
838 self._readbuffer = b''
839 self._offset = 0
840 while n > 0 and not self._eof:
841 data = self._read1(n)
842 if n < len(data):
843 self._readbuffer = data
844 self._offset = n
845 buf += data[:n]
846 break
847 buf += data
848 n -= len(data)
849 return buf
850
851 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000852 # Update the CRC using the given data.
853 if self._expected_crc is None:
854 # No need to compute the CRC if we don't have a reference value
855 return
856 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
857 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200858 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000859 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000860
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000861 def read1(self, n):
862 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200864 if n is None or n < 0:
865 buf = self._readbuffer[self._offset:]
866 self._readbuffer = b''
867 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300868 while not self._eof:
869 data = self._read1(self.MAX_N)
870 if data:
871 buf += data
872 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200873 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874
Antoine Pitrou78157b32012-06-23 16:44:48 +0200875 end = n + self._offset
876 if end < len(self._readbuffer):
877 buf = self._readbuffer[self._offset:end]
878 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200879 return buf
880
Antoine Pitrou78157b32012-06-23 16:44:48 +0200881 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200882 buf = self._readbuffer[self._offset:]
883 self._readbuffer = b''
884 self._offset = 0
885 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300886 while not self._eof:
887 data = self._read1(n)
888 if n < len(data):
889 self._readbuffer = data
890 self._offset = n
891 buf += data[:n]
892 break
893 if data:
894 buf += data
895 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200896 return buf
897
898 def _read1(self, n):
899 # Read up to n compressed bytes with at most one read() system call,
900 # decrypt and decompress them.
901 if self._eof or n <= 0:
902 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000904 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200905 if self._compress_type == ZIP_DEFLATED:
906 ## Handle unconsumed data.
907 data = self._decompressor.unconsumed_tail
908 if n > len(data):
909 data += self._read2(n - len(data))
910 else:
911 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200913 if self._compress_type == ZIP_STORED:
914 self._eof = self._compress_left <= 0
915 elif self._compress_type == ZIP_DEFLATED:
916 n = max(n, self.MIN_READ_SIZE)
917 data = self._decompressor.decompress(data, n)
918 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200919 self._compress_left <= 0 and
920 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200921 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000922 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200923 else:
924 data = self._decompressor.decompress(data)
925 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200927 data = data[:self._left]
928 self._left -= len(data)
929 if self._left <= 0:
930 self._eof = True
931 self._update_crc(data)
932 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000933
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 def _read2(self, n):
935 if self._compress_left <= 0:
936 return b''
937
938 n = max(n, self.MIN_READ_SIZE)
939 n = min(n, self._compress_left)
940
941 data = self._fileobj.read(n)
942 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200943 if not data:
944 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200945
946 if self._decrypter is not None:
947 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000948 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949
Łukasz Langae94980a2010-11-22 23:31:26 +0000950 def close(self):
951 try:
952 if self._close_fileobj:
953 self._fileobj.close()
954 finally:
955 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000956
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000957
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000958class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000959 """ Class with methods to open, read, write, close, list zip files.
960
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200961 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000962
Fred Drake3d9091e2001-03-26 15:49:24 +0000963 file: Either the path to the file, or a file-like object.
964 If it is a path, the file will be opened and closed by ZipFile.
965 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200966 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
967 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000968 allowZip64: if True ZipFile will create files with ZIP64 extensions when
969 needed, otherwise it will raise an exception when this would
970 be necessary.
971
Fred Drake3d9091e2001-03-26 15:49:24 +0000972 """
Fred Drake484d7352000-10-02 21:14:52 +0000973
Fred Drake90eac282001-02-28 05:29:34 +0000974 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800975 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000976
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200977 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000978 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000979 if mode not in ("r", "w", "a"):
980 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
981
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200982 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000983
984 self._allowZip64 = allowZip64
985 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000986 self.debug = 0 # Level of printing: 0 through 3
987 self.NameToInfo = {} # Find file info given name
988 self.filelist = [] # List of ZipInfo instances for archive
989 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200990 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +0000991 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400992 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000993
Fred Drake3d9091e2001-03-26 15:49:24 +0000994 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000995 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000996 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000997 self._filePassed = 0
998 self.filename = file
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200999 modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
1000 'r+b': 'w+b', 'w+b': 'wb'}
1001 filemode = modeDict[mode]
1002 while True:
1003 try:
1004 self.fp = io.open(file, filemode)
1005 except OSError:
1006 if filemode in modeDict:
1007 filemode = modeDict[filemode]
1008 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001009 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001010 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001011 else:
1012 self._filePassed = 1
1013 self.fp = file
1014 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001015 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001016 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001017 self._seekable = True
Tim Petersa19a1682001-03-29 04:36:09 +00001018
Antoine Pitrou17babc52012-11-17 23:50:08 +01001019 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001020 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001021 self._RealGetContents()
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001022 elif mode == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +00001023 # set the modified flag so central directory gets written
1024 # even if no files are added to the archive
1025 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001026 try:
1027 self.start_dir = self.fp.tell()
1028 except (AttributeError, OSError):
1029 self.fp = _Tellable(self.fp)
1030 self.start_dir = 0
1031 self._seekable = False
1032 else:
1033 # Some file-like objects can provide tell() but not seek()
1034 try:
1035 self.fp.seek(self.start_dir)
1036 except (AttributeError, OSError):
1037 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001038 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001039 try:
1040 # See if file is a zip file
1041 self._RealGetContents()
1042 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001043 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001044 except BadZipFile:
1045 # file is not a zip file, just append
1046 self.fp.seek(0, 2)
1047
1048 # set the modified flag so central directory gets written
1049 # even if no files are added to the archive
1050 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001051 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001052 else:
1053 raise RuntimeError('Mode must be "r", "w" or "a"')
1054 except:
1055 fp = self.fp
1056 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001057 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001058 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001059
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001060 def __enter__(self):
1061 return self
1062
1063 def __exit__(self, type, value, traceback):
1064 self.close()
1065
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001066 def __repr__(self):
1067 result = ['<%s.%s' % (self.__class__.__module__,
1068 self.__class__.__qualname__)]
1069 if self.fp is not None:
1070 if self._filePassed:
1071 result.append(' file=%r' % self.fp)
1072 elif self.filename is not None:
1073 result.append(' filename=%r' % self.filename)
1074 result.append(' mode=%r' % self.mode)
1075 else:
1076 result.append(' [closed]')
1077 result.append('>')
1078 return ''.join(result)
1079
Tim Peters7d3bad62001-04-04 18:56:49 +00001080 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001081 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001083 try:
1084 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001085 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001086 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001087 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001088 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001090 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001091 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1092 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001093 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001094
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001095 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001096 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001097 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1098 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001099 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1100
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001102 inferred = concat + offset_cd
1103 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 # self.start_dir: Position of start of central directory
1105 self.start_dir = offset_cd + concat
1106 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001107 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001108 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 total = 0
1110 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001111 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001112 if len(centdir) != sizeCentralDir:
1113 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001115 if centdir[_CD_SIGNATURE] != stringCentralDir:
1116 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001117 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001118 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001119 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001120 flags = centdir[5]
1121 if flags & 0x800:
1122 # UTF-8 file names extension
1123 filename = filename.decode('utf-8')
1124 else:
1125 # Historical ZIP filename encoding
1126 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001128 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001129 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1130 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001131 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001133 x.flag_bits, x.compress_type, t, d,
1134 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001135 if x.extract_version > MAX_EXTRACT_VERSION:
1136 raise NotImplementedError("zip file version %.1f" %
1137 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001138 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1139 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001140 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001142 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001143
1144 x._decodeExtra()
1145 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146 self.filelist.append(x)
1147 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001148
1149 # update total bytes read from central directory
1150 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1151 + centdir[_CD_EXTRA_FIELD_LENGTH]
1152 + centdir[_CD_COMMENT_LENGTH])
1153
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001155 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001156
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157
1158 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001159 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001160 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001161
1162 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001163 """Return a list of class ZipInfo instances for files in the
1164 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001165 return self.filelist
1166
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001167 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001168 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001169 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1170 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001171 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001172 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001173 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1174 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175
1176 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001177 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001178 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179 for zinfo in self.filelist:
1180 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001181 # Read by chunks, to avoid an OverflowError or a
1182 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001183 with self.open(zinfo.filename, "r") as f:
1184 while f.read(chunk_size): # Check CRC-32
1185 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001186 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001187 return zinfo.filename
1188
1189 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001190 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001191 info = self.NameToInfo.get(name)
1192 if info is None:
1193 raise KeyError(
1194 'There is no item named %r in the archive' % name)
1195
1196 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001197
Thomas Wouterscf297e42007-02-23 15:07:44 +00001198 def setpassword(self, pwd):
1199 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001200 if pwd and not isinstance(pwd, bytes):
1201 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1202 if pwd:
1203 self.pwd = pwd
1204 else:
1205 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001206
R David Murrayf50b38a2012-04-12 18:44:58 -04001207 @property
1208 def comment(self):
1209 """The comment text associated with the ZIP file."""
1210 return self._comment
1211
1212 @comment.setter
1213 def comment(self, comment):
1214 if not isinstance(comment, bytes):
1215 raise TypeError("comment: expected bytes, got %s" % type(comment))
1216 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001217 if len(comment) > ZIP_MAX_COMMENT:
1218 import warnings
1219 warnings.warn('Archive comment is too long; truncating to %d bytes'
1220 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001221 comment = comment[:ZIP_MAX_COMMENT]
1222 self._comment = comment
1223 self._didModify = True
1224
Thomas Wouterscf297e42007-02-23 15:07:44 +00001225 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001226 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001227 with self.open(name, "r", pwd) as fp:
1228 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001229
1230 def open(self, name, mode="r", pwd=None):
1231 """Return file-like object for 'name'."""
1232 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001233 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001234 if 'U' in mode:
1235 import warnings
1236 warnings.warn("'U' mode is deprecated",
1237 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001238 if pwd and not isinstance(pwd, bytes):
1239 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001240 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001241 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001242 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001243
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001244 # Make sure we have an info object
1245 if isinstance(name, ZipInfo):
1246 # 'name' is already an info object
1247 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001248 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001249 # Get info object for name
1250 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001251
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001252 self._fileRefCnt += 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001253 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001254 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001255 # Skip the file header:
1256 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001257 if len(fheader) != sizeFileHeader:
1258 raise BadZipFile("Truncated file header")
1259 fheader = struct.unpack(structFileHeader, fheader)
1260 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001261 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001262
Antoine Pitrou17babc52012-11-17 23:50:08 +01001263 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1264 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1265 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001266
Antoine Pitrou8572da52012-11-17 23:52:05 +01001267 if zinfo.flag_bits & 0x20:
1268 # Zip 2.7: compressed patched data
1269 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001270
Antoine Pitrou8572da52012-11-17 23:52:05 +01001271 if zinfo.flag_bits & 0x40:
1272 # strong encryption
1273 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001274
Antoine Pitrou17babc52012-11-17 23:50:08 +01001275 if zinfo.flag_bits & 0x800:
1276 # UTF-8 filename
1277 fname_str = fname.decode("utf-8")
1278 else:
1279 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001280
Antoine Pitrou17babc52012-11-17 23:50:08 +01001281 if fname_str != zinfo.orig_filename:
1282 raise BadZipFile(
1283 'File name in directory %r and header %r differ.'
1284 % (zinfo.orig_filename, fname))
1285
1286 # check for encrypted flag & handle password
1287 is_encrypted = zinfo.flag_bits & 0x1
1288 zd = None
1289 if is_encrypted:
1290 if not pwd:
1291 pwd = self.pwd
1292 if not pwd:
1293 raise RuntimeError("File %s is encrypted, password "
1294 "required for extraction" % name)
1295
1296 zd = _ZipDecrypter(pwd)
1297 # The first 12 bytes in the cypher stream is an encryption header
1298 # used to strengthen the algorithm. The first 11 bytes are
1299 # completely random, while the 12th contains the MSB of the CRC,
1300 # or the MSB of the file time depending on the header type
1301 # and is used to check the correctness of the password.
1302 header = zef_file.read(12)
1303 h = list(map(zd, header[0:12]))
1304 if zinfo.flag_bits & 0x8:
1305 # compare against the file type from extended local headers
1306 check_byte = (zinfo._raw_time >> 8) & 0xff
1307 else:
1308 # compare against the CRC otherwise
1309 check_byte = (zinfo.CRC >> 24) & 0xff
1310 if h[11] != check_byte:
1311 raise RuntimeError("Bad password for file", name)
1312
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001313 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001314 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001315 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001316 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317
Christian Heimes790c8232008-01-07 21:14:23 +00001318 def extract(self, member, path=None, pwd=None):
1319 """Extract a member from the archive to the current working directory,
1320 using its full name. Its file information is extracted as accurately
1321 as possible. `member' may be a filename or a ZipInfo object. You can
1322 specify a different directory using `path'.
1323 """
1324 if not isinstance(member, ZipInfo):
1325 member = self.getinfo(member)
1326
1327 if path is None:
1328 path = os.getcwd()
1329
1330 return self._extract_member(member, path, pwd)
1331
1332 def extractall(self, path=None, members=None, pwd=None):
1333 """Extract all members from the archive to the current working
1334 directory. `path' specifies a different directory to extract to.
1335 `members' is optional and must be a subset of the list returned
1336 by namelist().
1337 """
1338 if members is None:
1339 members = self.namelist()
1340
1341 for zipinfo in members:
1342 self.extract(zipinfo, path, pwd)
1343
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001344 @classmethod
1345 def _sanitize_windows_name(cls, arcname, pathsep):
1346 """Replace bad characters and remove trailing dots from parts."""
1347 table = cls._windows_illegal_name_trans_table
1348 if not table:
1349 illegal = ':<>|"?*'
1350 table = str.maketrans(illegal, '_' * len(illegal))
1351 cls._windows_illegal_name_trans_table = table
1352 arcname = arcname.translate(table)
1353 # remove trailing dots
1354 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1355 # rejoin, removing empty parts.
1356 arcname = pathsep.join(x for x in arcname if x)
1357 return arcname
1358
Christian Heimes790c8232008-01-07 21:14:23 +00001359 def _extract_member(self, member, targetpath, pwd):
1360 """Extract the ZipInfo object 'member' to a physical
1361 file on the path targetpath.
1362 """
1363 # build the destination pathname, replacing
1364 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001365 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001366
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001367 if os.path.altsep:
1368 arcname = arcname.replace(os.path.altsep, os.path.sep)
1369 # interpret absolute pathname as relative, remove drive letter or
1370 # UNC path, redundant separators, "." and ".." components.
1371 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001372 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001373 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001374 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001375 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001376 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001377 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001378
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001379 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001380 targetpath = os.path.normpath(targetpath)
1381
1382 # Create all upper directories if necessary.
1383 upperdirs = os.path.dirname(targetpath)
1384 if upperdirs and not os.path.exists(upperdirs):
1385 os.makedirs(upperdirs)
1386
Martin v. Löwis59e47792009-01-24 14:10:07 +00001387 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001388 if not os.path.isdir(targetpath):
1389 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001390 return targetpath
1391
Antoine Pitrou17babc52012-11-17 23:50:08 +01001392 with self.open(member, pwd=pwd) as source, \
1393 open(targetpath, "wb") as target:
1394 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001395
1396 return targetpath
1397
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001398 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001399 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001400 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001401 import warnings
1402 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001403 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001404 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001405 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001406 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001407 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001408 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001409 if not self._allowZip64:
1410 requires_zip64 = None
1411 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1412 requires_zip64 = "Files count"
1413 elif zinfo.file_size > ZIP64_LIMIT:
1414 requires_zip64 = "Filesize"
1415 elif zinfo.header_offset > ZIP64_LIMIT:
1416 requires_zip64 = "Zipfile size"
1417 if requires_zip64:
1418 raise LargeZipFile(requires_zip64 +
1419 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001420
1421 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001422 """Put the bytes from filename into the archive under the name
1423 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001424 if not self.fp:
1425 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001426 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001427
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001428 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001429 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001430 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001431 date_time = mtime[0:6]
1432 # Create ZipInfo instance to store file information
1433 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001434 arcname = filename
1435 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1436 while arcname[0] in (os.sep, os.altsep):
1437 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001438 if isdir:
1439 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001440 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001441 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001442 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001443 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001444 else:
Tim Peterse1190062001-01-15 03:34:38 +00001445 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001446
1447 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001448 zinfo.flag_bits = 0x00
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001449 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001450 if self._seekable:
1451 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001452 zinfo.header_offset = self.fp.tell() # Start of header bytes
1453 if zinfo.compress_type == ZIP_LZMA:
1454 # Compressed data includes an end-of-stream (EOS) marker
1455 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001456
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001457 self._writecheck(zinfo)
1458 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001459
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001460 if isdir:
1461 zinfo.file_size = 0
1462 zinfo.compress_size = 0
1463 zinfo.CRC = 0
1464 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1465 self.filelist.append(zinfo)
1466 self.NameToInfo[zinfo.filename] = zinfo
1467 self.fp.write(zinfo.FileHeader(False))
1468 self.start_dir = self.fp.tell()
1469 return
1470
1471 cmpr = _get_compressor(zinfo.compress_type)
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001472 if not self._seekable:
1473 zinfo.flag_bits |= 0x08
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001474 with open(filename, "rb") as fp:
1475 # Must overwrite CRC and sizes with correct data later
1476 zinfo.CRC = CRC = 0
1477 zinfo.compress_size = compress_size = 0
1478 # Compressed size can be larger than uncompressed size
1479 zip64 = self._allowZip64 and \
1480 zinfo.file_size * 1.05 > ZIP64_LIMIT
1481 self.fp.write(zinfo.FileHeader(zip64))
1482 file_size = 0
1483 while 1:
1484 buf = fp.read(1024 * 8)
1485 if not buf:
1486 break
1487 file_size = file_size + len(buf)
1488 CRC = crc32(buf, CRC) & 0xffffffff
1489 if cmpr:
1490 buf = cmpr.compress(buf)
1491 compress_size = compress_size + len(buf)
1492 self.fp.write(buf)
1493 if cmpr:
1494 buf = cmpr.flush()
1495 compress_size = compress_size + len(buf)
1496 self.fp.write(buf)
1497 zinfo.compress_size = compress_size
1498 else:
1499 zinfo.compress_size = file_size
1500 zinfo.CRC = CRC
1501 zinfo.file_size = file_size
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001502 if zinfo.flag_bits & 0x08:
1503 # Write CRC and file sizes after the file data
1504 fmt = '<LQQ' if zip64 else '<LLL'
1505 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1506 zinfo.file_size))
1507 self.start_dir = self.fp.tell()
1508 else:
1509 if not zip64 and self._allowZip64:
1510 if file_size > ZIP64_LIMIT:
1511 raise RuntimeError('File size has increased during compressing')
1512 if compress_size > ZIP64_LIMIT:
1513 raise RuntimeError('Compressed size larger than uncompressed size')
1514 # Seek backwards and write file header (which will now include
1515 # correct CRC and file sizes)
1516 self.start_dir = self.fp.tell() # Preserve current position in file
1517 self.fp.seek(zinfo.header_offset)
1518 self.fp.write(zinfo.FileHeader(zip64))
1519 self.fp.seek(self.start_dir)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001520 self.filelist.append(zinfo)
1521 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001522
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001523 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001524 """Write a file into the archive. The contents is 'data', which
1525 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1526 it is encoded as UTF-8 first.
1527 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001528 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001529 if isinstance(data, str):
1530 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001531 if not isinstance(zinfo_or_arcname, ZipInfo):
1532 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001533 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001534 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001535 if zinfo.filename[-1] == '/':
1536 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1537 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1538 else:
1539 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001540 else:
1541 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001542
1543 if not self.fp:
1544 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001545 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001546
Guido van Rossum85825dc2007-08-27 17:03:28 +00001547 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001548 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001549 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001550 self.fp.seek(self.start_dir)
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001551 zinfo.header_offset = self.fp.tell() # Start of header data
1552 if compress_type is not None:
1553 zinfo.compress_type = compress_type
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001554 zinfo.header_offset = self.fp.tell() # Start of header data
1555 if compress_type is not None:
1556 zinfo.compress_type = compress_type
1557 if zinfo.compress_type == ZIP_LZMA:
1558 # Compressed data includes an end-of-stream (EOS) marker
1559 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001560
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001561 self._writecheck(zinfo)
1562 self._didModify = True
1563 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1564 co = _get_compressor(zinfo.compress_type)
1565 if co:
1566 data = co.compress(data) + co.flush()
1567 zinfo.compress_size = len(data) # Compressed size
1568 else:
1569 zinfo.compress_size = zinfo.file_size
1570 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1571 zinfo.compress_size > ZIP64_LIMIT
1572 if zip64 and not self._allowZip64:
1573 raise LargeZipFile("Filesize would require ZIP64 extensions")
1574 self.fp.write(zinfo.FileHeader(zip64))
1575 self.fp.write(data)
1576 if zinfo.flag_bits & 0x08:
1577 # Write CRC and file sizes after the file data
1578 fmt = '<LQQ' if zip64 else '<LLL'
1579 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1580 zinfo.file_size))
1581 self.fp.flush()
1582 self.start_dir = self.fp.tell()
1583 self.filelist.append(zinfo)
1584 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001585
1586 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001587 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001588 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001589
1590 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001591 """Close the file, and for mode "w" and "a" write the ending
1592 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001593 if self.fp is None:
1594 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001595
Antoine Pitrou17babc52012-11-17 23:50:08 +01001596 try:
1597 if self.mode in ("w", "a") and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001598 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001599 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001600 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001601 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001602 finally:
1603 fp = self.fp
1604 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001605 self._fpclose(fp)
1606
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001607 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001608 for zinfo in self.filelist: # write central directory
1609 dt = zinfo.date_time
1610 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1611 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1612 extra = []
1613 if zinfo.file_size > ZIP64_LIMIT \
1614 or zinfo.compress_size > ZIP64_LIMIT:
1615 extra.append(zinfo.file_size)
1616 extra.append(zinfo.compress_size)
1617 file_size = 0xffffffff
1618 compress_size = 0xffffffff
1619 else:
1620 file_size = zinfo.file_size
1621 compress_size = zinfo.compress_size
1622
1623 if zinfo.header_offset > ZIP64_LIMIT:
1624 extra.append(zinfo.header_offset)
1625 header_offset = 0xffffffff
1626 else:
1627 header_offset = zinfo.header_offset
1628
1629 extra_data = zinfo.extra
1630 min_version = 0
1631 if extra:
1632 # Append a ZIP64 field to the extra's
1633 extra_data = struct.pack(
1634 '<HH' + 'Q'*len(extra),
1635 1, 8*len(extra), *extra) + extra_data
1636
1637 min_version = ZIP64_VERSION
1638
1639 if zinfo.compress_type == ZIP_BZIP2:
1640 min_version = max(BZIP2_VERSION, min_version)
1641 elif zinfo.compress_type == ZIP_LZMA:
1642 min_version = max(LZMA_VERSION, min_version)
1643
1644 extract_version = max(min_version, zinfo.extract_version)
1645 create_version = max(min_version, zinfo.create_version)
1646 try:
1647 filename, flag_bits = zinfo._encodeFilenameFlags()
1648 centdir = struct.pack(structCentralDir,
1649 stringCentralDir, create_version,
1650 zinfo.create_system, extract_version, zinfo.reserved,
1651 flag_bits, zinfo.compress_type, dostime, dosdate,
1652 zinfo.CRC, compress_size, file_size,
1653 len(filename), len(extra_data), len(zinfo.comment),
1654 0, zinfo.internal_attr, zinfo.external_attr,
1655 header_offset)
1656 except DeprecationWarning:
1657 print((structCentralDir, stringCentralDir, create_version,
1658 zinfo.create_system, extract_version, zinfo.reserved,
1659 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1660 zinfo.CRC, compress_size, file_size,
1661 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1662 0, zinfo.internal_attr, zinfo.external_attr,
1663 header_offset), file=sys.stderr)
1664 raise
1665 self.fp.write(centdir)
1666 self.fp.write(filename)
1667 self.fp.write(extra_data)
1668 self.fp.write(zinfo.comment)
1669
1670 pos2 = self.fp.tell()
1671 # Write end-of-zip-archive record
1672 centDirCount = len(self.filelist)
1673 centDirSize = pos2 - self.start_dir
1674 centDirOffset = self.start_dir
1675 requires_zip64 = None
1676 if centDirCount > ZIP_FILECOUNT_LIMIT:
1677 requires_zip64 = "Files count"
1678 elif centDirOffset > ZIP64_LIMIT:
1679 requires_zip64 = "Central directory offset"
1680 elif centDirSize > ZIP64_LIMIT:
1681 requires_zip64 = "Central directory size"
1682 if requires_zip64:
1683 # Need to write the ZIP64 end-of-archive records
1684 if not self._allowZip64:
1685 raise LargeZipFile(requires_zip64 +
1686 " would require ZIP64 extensions")
1687 zip64endrec = struct.pack(
1688 structEndArchive64, stringEndArchive64,
1689 44, 45, 45, 0, 0, centDirCount, centDirCount,
1690 centDirSize, centDirOffset)
1691 self.fp.write(zip64endrec)
1692
1693 zip64locrec = struct.pack(
1694 structEndArchive64Locator,
1695 stringEndArchive64Locator, 0, pos2, 1)
1696 self.fp.write(zip64locrec)
1697 centDirCount = min(centDirCount, 0xFFFF)
1698 centDirSize = min(centDirSize, 0xFFFFFFFF)
1699 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1700
1701 endrec = struct.pack(structEndArchive, stringEndArchive,
1702 0, 0, centDirCount, centDirCount,
1703 centDirSize, centDirOffset, len(self._comment))
1704 self.fp.write(endrec)
1705 self.fp.write(self._comment)
1706 self.fp.flush()
1707
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001708 def _fpclose(self, fp):
1709 assert self._fileRefCnt > 0
1710 self._fileRefCnt -= 1
1711 if not self._fileRefCnt and not self._filePassed:
1712 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001713
1714
1715class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001716 """Class to create ZIP archives with Python library files and packages."""
1717
Georg Brandl8334fd92010-12-04 10:26:46 +00001718 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001719 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001720 ZipFile.__init__(self, file, mode=mode, compression=compression,
1721 allowZip64=allowZip64)
1722 self._optimize = optimize
1723
Christian Tismer59202e52013-10-21 03:59:23 +02001724 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001725 """Add all files from "pathname" to the ZIP archive.
1726
Fred Drake484d7352000-10-02 21:14:52 +00001727 If pathname is a package directory, search the directory and
1728 all package subdirectories recursively for all *.py and enter
1729 the modules into the archive. If pathname is a plain
1730 directory, listdir *.py and enter all modules. Else, pathname
1731 must be a Python *.py file and the module will be put into the
1732 archive. Added modules are always module.pyo or module.pyc.
1733 This method will compile the module.py into module.pyc if
1734 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001735 If filterfunc(pathname) is given, it is called with every argument.
1736 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001737 """
Christian Tismer59202e52013-10-21 03:59:23 +02001738 if filterfunc and not filterfunc(pathname):
1739 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001740 label = 'path' if os.path.isdir(pathname) else 'file'
1741 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001742 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001743 dir, name = os.path.split(pathname)
1744 if os.path.isdir(pathname):
1745 initname = os.path.join(pathname, "__init__.py")
1746 if os.path.isfile(initname):
1747 # This is a package directory, add it
1748 if basename:
1749 basename = "%s/%s" % (basename, name)
1750 else:
1751 basename = name
1752 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001753 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001754 fname, arcname = self._get_codename(initname[0:-3], basename)
1755 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001756 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001757 self.write(fname, arcname)
1758 dirlist = os.listdir(pathname)
1759 dirlist.remove("__init__.py")
1760 # Add all *.py files and package subdirectories
1761 for filename in dirlist:
1762 path = os.path.join(pathname, filename)
1763 root, ext = os.path.splitext(filename)
1764 if os.path.isdir(path):
1765 if os.path.isfile(os.path.join(path, "__init__.py")):
1766 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001767 self.writepy(path, basename,
1768 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001769 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001770 if filterfunc and not filterfunc(path):
1771 if self.debug:
1772 print('file "%s" skipped by filterfunc' % path)
1773 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001774 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001775 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001776 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001777 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001778 self.write(fname, arcname)
1779 else:
1780 # This is NOT a package directory, add its files at top level
1781 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001782 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001783 for filename in os.listdir(pathname):
1784 path = os.path.join(pathname, filename)
1785 root, ext = os.path.splitext(filename)
1786 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001787 if filterfunc and not filterfunc(path):
1788 if self.debug:
1789 print('file "%s" skipped by filterfunc' % path)
1790 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001791 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001792 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001793 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001794 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001795 self.write(fname, arcname)
1796 else:
1797 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001798 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001799 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001800 fname, arcname = self._get_codename(pathname[0:-3], basename)
1801 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001802 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001803 self.write(fname, arcname)
1804
1805 def _get_codename(self, pathname, basename):
1806 """Return (filename, archivename) for the path.
1807
Fred Drake484d7352000-10-02 21:14:52 +00001808 Given a module name path, return the correct file path and
1809 archive name, compiling if necessary. For example, given
1810 /python/lib/string, return (/python/lib/string.pyc, string).
1811 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001812 def _compile(file, optimize=-1):
1813 import py_compile
1814 if self.debug:
1815 print("Compiling", file)
1816 try:
1817 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001818 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001819 print(err.msg)
1820 return False
1821 return True
1822
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001823 file_py = pathname + ".py"
1824 file_pyc = pathname + ".pyc"
1825 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001826 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1827 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001828 if self._optimize == -1:
1829 # legacy mode: use whatever file is present
1830 if (os.path.isfile(file_pyo) and
1831 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1832 # Use .pyo file.
1833 arcname = fname = file_pyo
1834 elif (os.path.isfile(file_pyc) and
1835 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1836 # Use .pyc file.
1837 arcname = fname = file_pyc
1838 elif (os.path.isfile(pycache_pyc) and
1839 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1840 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1841 # file name in the archive.
1842 fname = pycache_pyc
1843 arcname = file_pyc
1844 elif (os.path.isfile(pycache_pyo) and
1845 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1846 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1847 # file name in the archive.
1848 fname = pycache_pyo
1849 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001850 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001851 # Compile py into PEP 3147 pyc file.
1852 if _compile(file_py):
1853 fname = (pycache_pyc if __debug__ else pycache_pyo)
1854 arcname = (file_pyc if __debug__ else file_pyo)
1855 else:
1856 fname = arcname = file_py
1857 else:
1858 # new mode: use given optimization level
1859 if self._optimize == 0:
1860 fname = pycache_pyc
1861 arcname = file_pyc
1862 else:
1863 fname = pycache_pyo
1864 arcname = file_pyo
1865 if not (os.path.isfile(fname) and
1866 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1867 if not _compile(file_py, optimize=self._optimize):
1868 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001869 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001870 if basename:
1871 archivename = "%s/%s" % (basename, archivename)
1872 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001873
1874
1875def main(args = None):
1876 import textwrap
1877 USAGE=textwrap.dedent("""\
1878 Usage:
1879 zipfile.py -l zipfile.zip # Show listing of a zipfile
1880 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1881 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1882 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1883 """)
1884 if args is None:
1885 args = sys.argv[1:]
1886
1887 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001888 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001889 sys.exit(1)
1890
1891 if args[0] == '-l':
1892 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001893 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001894 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001895 with ZipFile(args[1], 'r') as zf:
1896 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001897
1898 elif args[0] == '-t':
1899 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001900 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001901 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001902 with ZipFile(args[1], 'r') as zf:
1903 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001904 if badfile:
1905 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001906 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001907
1908 elif args[0] == '-e':
1909 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001910 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001911 sys.exit(1)
1912
Antoine Pitrou17babc52012-11-17 23:50:08 +01001913 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001914 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001915
1916 elif args[0] == '-c':
1917 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001918 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001919 sys.exit(1)
1920
1921 def addToZip(zf, path, zippath):
1922 if os.path.isfile(path):
1923 zf.write(path, zippath, ZIP_DEFLATED)
1924 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001925 if zippath:
1926 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001927 for nm in os.listdir(path):
1928 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001929 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001930 # else: ignore
1931
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001932 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001933 for path in args[2:]:
1934 zippath = os.path.basename(path)
1935 if not zippath:
1936 zippath = os.path.basename(os.path.dirname(path))
1937 if zippath in ('', os.curdir, os.pardir):
1938 zippath = ''
1939 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001940
1941if __name__ == "__main__":
1942 main()