blob: 03dead53171c552df13a2201bcb3451fd8c62621 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Serhiy Storchaka9e777732015-10-10 19:43:32 +030017try:
18 import threading
19except ImportError:
20 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021
22try:
Tim Peterse1190062001-01-15 03:34:38 +000023 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040025except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000027 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029try:
30 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020032 bz2 = None
33
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034try:
35 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040036except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 lzma = None
38
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020039__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020040 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000041 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Georg Brandl4d540882010-10-28 06:42:33 +000043class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
Georg Brandl4d540882010-10-28 06:42:33 +000053error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000056ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030057ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000058ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020064ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000065# Other ZIP compression methods not supported
66
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020067DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020071# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020072MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020073
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000079
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000082structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000102stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000103sizeCentralDir = struct.calcsize(structCentralDir)
104
Fred Drake3e038e52001-02-28 17:56:26 +0000105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000129stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130sizeFileHeader = struct.calcsize(structFileHeader)
131
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000167def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000169 if _EndRecData(fp):
170 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200171 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000172 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000173 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000174
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000175def is_zipfile(filename):
176 """Quickly see if a file is a ZIP file by checking the magic number.
177
178 The filename argument may be a file or file-like object too.
179 """
180 result = False
181 try:
182 if hasattr(filename, "read"):
183 result = _check_zipfile(fp=filename)
184 else:
185 with open(filename, "rb") as fp:
186 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200187 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188 pass
189 return result
190
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000191def _EndRecData64(fpin, offset, endrec):
192 """
193 Read the ZIP64 end-of-archive records and use that to update endrec
194 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 try:
196 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200197 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000198 # If the seek fails, the file is not large enough to contain a ZIP64
199 # end-of-archive record, so just return the end record we were given.
200 return endrec
201
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000202 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200203 if len(data) != sizeEndCentDir64Locator:
204 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
206 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207 return endrec
208
209 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000210 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000211
212 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
214 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200215 if len(data) != sizeEndCentDir64:
216 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200218 dircount, dircount2, dirsize, diroffset = \
219 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000220 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000221 return endrec
222
223 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000224 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 endrec[_ECD_DISK_NUMBER] = disk_num
226 endrec[_ECD_DISK_START] = disk_dir
227 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
228 endrec[_ECD_ENTRIES_TOTAL] = dircount2
229 endrec[_ECD_SIZE] = dirsize
230 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000231 return endrec
232
233
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234def _EndRecData(fpin):
235 """Return data from the "End of Central Directory" record, or None.
236
237 The data is a list of the nine items in the ZIP "End of central dir"
238 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000239
240 # Determine file size
241 fpin.seek(0, 2)
242 filesize = fpin.tell()
243
244 # Check to see if this is ZIP file with no archive comment (the
245 # "end of central directory" structure should be the last item in the
246 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 try:
248 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000250 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000251 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200252 if (len(data) == sizeEndCentDir and
253 data[0:4] == stringEndArchive and
254 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000256 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257 endrec=list(endrec)
258
259 # Append a blank comment and record start offset
260 endrec.append(b"")
261 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000263 # Try to read the "Zip64 end of central directory" structure
264 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265
266 # Either this is not a ZIP file, or it is a ZIP file with an archive
267 # comment. Search the end of the file for the "end of central directory"
268 # record signature. The comment is the last item in the ZIP file and may be
269 # up to 64K long. It is assumed that the "end of central directory" magic
270 # number does not appear in the comment.
271 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
272 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000273 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000274 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 if start >= 0:
276 # found the magic number; attempt to unpack and interpret
277 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200278 if len(recData) != sizeEndCentDir:
279 # Zip file is corrupted.
280 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000281 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
283 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
284 endrec.append(comment)
285 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000286
R David Murray4fbb9db2011-06-09 15:50:51 -0400287 # Try to read the "Zip64 end of central directory" structure
288 return _EndRecData64(fpin, maxCommentStart + start - filesize,
289 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000290
291 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200292 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000293
Fred Drake484d7352000-10-02 21:14:52 +0000294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000296 """Class with attributes describing each file in the ZIP archive."""
297
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'orig_filename',
300 'filename',
301 'date_time',
302 'compress_type',
303 'comment',
304 'extra',
305 'create_system',
306 'create_version',
307 'extract_version',
308 'reserved',
309 'flag_bits',
310 'volume',
311 'internal_attr',
312 'external_attr',
313 'header_offset',
314 'CRC',
315 'compress_size',
316 'file_size',
317 '_raw_time',
318 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000322
323 # Terminate the file name at the first null byte. Null bytes in file
324 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000325 null_byte = filename.find(chr(0))
326 if null_byte >= 0:
327 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000328 # This is used to ensure paths in generated ZIP files always use
329 # forward slashes as the directory separator, as required by the
330 # ZIP format specification.
331 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000332 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000333
Greg Ward8e36d282003-06-18 00:53:06 +0000334 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000335 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800336
337 if date_time[0] < 1980:
338 raise ValueError('ZIP does not support timestamps before 1980')
339
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000341 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000342 self.comment = b"" # Comment for each file
343 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000344 if sys.platform == 'win32':
345 self.create_system = 0 # System which created ZIP archive
346 else:
347 # Assume everything else is unix-y
348 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200349 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
350 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000351 self.reserved = 0 # Must be zero
352 self.flag_bits = 0 # ZIP flag bits
353 self.volume = 0 # Volume number of file header
354 self.internal_attr = 0 # Internal attributes
355 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000357 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000358 # CRC CRC-32 of the uncompressed file
359 # compress_size Size of the compressed file
360 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200362 def __repr__(self):
363 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
364 if self.compress_type != ZIP_STORED:
365 result.append(' compress_type=%s' %
366 compressor_names.get(self.compress_type,
367 self.compress_type))
368 hi = self.external_attr >> 16
369 lo = self.external_attr & 0xFFFF
370 if hi:
371 result.append(' filemode=%r' % stat.filemode(hi))
372 if lo:
373 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200374 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200375 if not isdir or self.file_size:
376 result.append(' file_size=%r' % self.file_size)
377 if ((not isdir or self.compress_size) and
378 (self.compress_type != ZIP_STORED or
379 self.file_size != self.compress_size)):
380 result.append(' compress_size=%r' % self.compress_size)
381 result.append('>')
382 return ''.join(result)
383
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200384 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000385 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 dt = self.date_time
387 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000388 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000390 # Set these to zero because we write them after the file data
391 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392 else:
Tim Peterse1190062001-01-15 03:34:38 +0000393 CRC = self.CRC
394 compress_size = self.compress_size
395 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396
397 extra = self.extra
398
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200399 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200400 if zip64 is None:
401 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
402 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000403 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200405 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200406 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
407 if not zip64:
408 raise LargeZipFile("Filesize would require ZIP64 extensions")
409 # File is larger than what fits into a 4 byte integer,
410 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000411 file_size = 0xffffffff
412 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200413 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200415 if self.compress_type == ZIP_BZIP2:
416 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200417 elif self.compress_type == ZIP_LZMA:
418 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200419
420 self.extract_version = max(min_version, self.extract_version)
421 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000422 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000423 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200424 self.extract_version, self.reserved, flag_bits,
425 self.compress_type, dostime, dosdate, CRC,
426 compress_size, file_size,
427 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000428 return header + filename + extra
429
430 def _encodeFilenameFlags(self):
431 try:
432 return self.filename.encode('ascii'), self.flag_bits
433 except UnicodeEncodeError:
434 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000435
436 def _decodeExtra(self):
437 # Try to decode the extra field.
438 extra = self.extra
439 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700440 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if tp == 1:
443 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000444 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000446 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000447 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000448 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449 elif ln == 0:
450 counts = ()
451 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000452 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
454 idx = 0
455
456 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000457 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 self.file_size = counts[idx]
459 idx += 1
460
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000461 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000462 self.compress_size = counts[idx]
463 idx += 1
464
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000465 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000466 old = self.header_offset
467 self.header_offset = counts[idx]
468 idx+=1
469
470 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200472 @classmethod
473 def from_file(cls, filename, arcname=None):
474 """Construct an appropriate ZipInfo for a file on the filesystem.
475
476 filename should be the path to a file or directory on the filesystem.
477
478 arcname is the name which it will have within the archive (by default,
479 this will be the same as filename, but without a drive letter and with
480 leading path separators removed).
481 """
482 st = os.stat(filename)
483 isdir = stat.S_ISDIR(st.st_mode)
484 mtime = time.localtime(st.st_mtime)
485 date_time = mtime[0:6]
486 # Create ZipInfo instance to store file information
487 if arcname is None:
488 arcname = filename
489 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
490 while arcname[0] in (os.sep, os.altsep):
491 arcname = arcname[1:]
492 if isdir:
493 arcname += '/'
494 zinfo = cls(arcname, date_time)
495 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
496 if isdir:
497 zinfo.file_size = 0
498 zinfo.external_attr |= 0x10 # MS-DOS directory flag
499 else:
500 zinfo.file_size = st.st_size
501
502 return zinfo
503
504 def is_dir(self):
505 return self.filename[-1] == '/'
506
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000507
Thomas Wouterscf297e42007-02-23 15:07:44 +0000508class _ZipDecrypter:
509 """Class to handle decryption of files stored within a ZIP archive.
510
511 ZIP supports a password-based form of encryption. Even though known
512 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000513 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000514
515 Usage:
516 zd = _ZipDecrypter(mypwd)
517 plain_char = zd(cypher_char)
518 plain_text = map(zd, cypher_text)
519 """
520
521 def _GenerateCRCTable():
522 """Generate a CRC-32 table.
523
524 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
525 internal keys. We noticed that a direct implementation is faster than
526 relying on binascii.crc32().
527 """
528 poly = 0xedb88320
529 table = [0] * 256
530 for i in range(256):
531 crc = i
532 for j in range(8):
533 if crc & 1:
534 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
535 else:
536 crc = ((crc >> 1) & 0x7FFFFFFF)
537 table[i] = crc
538 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500539 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540
541 def _crc32(self, ch, crc):
542 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000543 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
545 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500546 if _ZipDecrypter.crctable is None:
547 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000548 self.key0 = 305419896
549 self.key1 = 591751049
550 self.key2 = 878082192
551 for p in pwd:
552 self._UpdateKeys(p)
553
554 def _UpdateKeys(self, c):
555 self.key0 = self._crc32(c, self.key0)
556 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
557 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000558 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000559
560 def __call__(self, c):
561 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000562 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563 k = self.key2 | 2
564 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565 self._UpdateKeys(c)
566 return c
567
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200568
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200569class LZMACompressor:
570
571 def __init__(self):
572 self._comp = None
573
574 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200575 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200576 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200577 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200578 ])
579 return struct.pack('<BBH', 9, 4, len(props)) + props
580
581 def compress(self, data):
582 if self._comp is None:
583 return self._init() + self._comp.compress(data)
584 return self._comp.compress(data)
585
586 def flush(self):
587 if self._comp is None:
588 return self._init() + self._comp.flush()
589 return self._comp.flush()
590
591
592class LZMADecompressor:
593
594 def __init__(self):
595 self._decomp = None
596 self._unconsumed = b''
597 self.eof = False
598
599 def decompress(self, data):
600 if self._decomp is None:
601 self._unconsumed += data
602 if len(self._unconsumed) <= 4:
603 return b''
604 psize, = struct.unpack('<H', self._unconsumed[2:4])
605 if len(self._unconsumed) <= 4 + psize:
606 return b''
607
608 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200609 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
610 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611 ])
612 data = self._unconsumed[4 + psize:]
613 del self._unconsumed
614
615 result = self._decomp.decompress(data)
616 self.eof = self._decomp.eof
617 return result
618
619
620compressor_names = {
621 0: 'store',
622 1: 'shrink',
623 2: 'reduce',
624 3: 'reduce',
625 4: 'reduce',
626 5: 'reduce',
627 6: 'implode',
628 7: 'tokenize',
629 8: 'deflate',
630 9: 'deflate64',
631 10: 'implode',
632 12: 'bzip2',
633 14: 'lzma',
634 18: 'terse',
635 19: 'lz77',
636 97: 'wavpack',
637 98: 'ppmd',
638}
639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640def _check_compression(compression):
641 if compression == ZIP_STORED:
642 pass
643 elif compression == ZIP_DEFLATED:
644 if not zlib:
645 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200646 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647 elif compression == ZIP_BZIP2:
648 if not bz2:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200651 elif compression == ZIP_LZMA:
652 if not lzma:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200655 else:
656 raise RuntimeError("That compression method is not supported")
657
658
659def _get_compressor(compress_type):
660 if compress_type == ZIP_DEFLATED:
661 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200662 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200663 elif compress_type == ZIP_BZIP2:
664 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200665 elif compress_type == ZIP_LZMA:
666 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 else:
668 return None
669
670
671def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200672 if compress_type == ZIP_STORED:
673 return None
674 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200675 return zlib.decompressobj(-15)
676 elif compress_type == ZIP_BZIP2:
677 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200678 elif compress_type == ZIP_LZMA:
679 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200680 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200681 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200682 if descr:
683 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
684 else:
685 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200686
687
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200688class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300689 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200690 self._file = file
691 self._pos = pos
692 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200693 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300694 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200695
696 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200697 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300698 if self._writing():
699 raise RuntimeError("Can't read from the ZIP file while there "
700 "is an open writing handle on it. "
701 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200702 self._file.seek(self._pos)
703 data = self._file.read(n)
704 self._pos = self._file.tell()
705 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200706
707 def close(self):
708 if self._file is not None:
709 fileobj = self._file
710 self._file = None
711 self._close(fileobj)
712
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200713# Provide the tell method for unseekable stream
714class _Tellable:
715 def __init__(self, fp):
716 self.fp = fp
717 self.offset = 0
718
719 def write(self, data):
720 n = self.fp.write(data)
721 self.offset += n
722 return n
723
724 def tell(self):
725 return self.offset
726
727 def flush(self):
728 self.fp.flush()
729
730 def close(self):
731 self.fp.close()
732
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200733
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000734class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000735 """File-like object for reading an archive member.
736 Is returned by ZipFile.open().
737 """
738
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000739 # Max size supported by decompressor.
740 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000741
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000742 # Read from compressed files in 4k blocks.
743 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000744
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000745 # Search for universal newlines or line chunks.
746 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
747
Łukasz Langae94980a2010-11-22 23:31:26 +0000748 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
749 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000750 self._fileobj = fileobj
751 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000752 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000753
Ezio Melotti92b47432010-01-28 01:44:41 +0000754 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000755 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200756 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000757
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200758 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000759
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200760 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000761 self._readbuffer = b''
762 self._offset = 0
763
764 self._universal = 'U' in mode
765 self.newlines = None
766
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000767 # Adjust read size for encrypted files since the first 12 bytes
768 # are for the encryption/password information.
769 if self._decrypter is not None:
770 self._compress_left -= 12
771
772 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 self.name = zipinfo.filename
774
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000775 if hasattr(zipinfo, 'CRC'):
776 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000777 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000778 else:
779 self._expected_crc = None
780
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200781 def __repr__(self):
782 result = ['<%s.%s' % (self.__class__.__module__,
783 self.__class__.__qualname__)]
784 if not self.closed:
785 result.append(' name=%r mode=%r' % (self.name, self.mode))
786 if self._compress_type != ZIP_STORED:
787 result.append(' compress_type=%s' %
788 compressor_names.get(self._compress_type,
789 self._compress_type))
790 else:
791 result.append(' [closed]')
792 result.append('>')
793 return ''.join(result)
794
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000795 def readline(self, limit=-1):
796 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000798 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000801 if not self._universal and limit < 0:
802 # Shortcut common case - newline found in buffer.
803 i = self._readbuffer.find(b'\n', self._offset) + 1
804 if i > 0:
805 line = self._readbuffer[self._offset: i]
806 self._offset = i
807 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000809 if not self._universal:
810 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000812 line = b''
813 while limit < 0 or len(line) < limit:
814 readahead = self.peek(2)
815 if readahead == b'':
816 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000818 #
819 # Search for universal newlines or line chunks.
820 #
821 # The pattern returns either a line chunk or a newline, but not
822 # both. Combined with peek(2), we are assured that the sequence
823 # '\r\n' is always retrieved completely and never split into
824 # separate newlines - '\r', '\n' due to coincidental readaheads.
825 #
826 match = self.PATTERN.search(readahead)
827 newline = match.group('newline')
828 if newline is not None:
829 if self.newlines is None:
830 self.newlines = []
831 if newline not in self.newlines:
832 self.newlines.append(newline)
833 self._offset += len(newline)
834 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000836 chunk = match.group('chunk')
837 if limit >= 0:
838 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000840 self._offset += len(chunk)
841 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000843 return line
844
845 def peek(self, n=1):
846 """Returns buffered bytes without advancing the position."""
847 if n > len(self._readbuffer) - self._offset:
848 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200849 if len(chunk) > self._offset:
850 self._readbuffer = chunk + self._readbuffer[self._offset:]
851 self._offset = 0
852 else:
853 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000854
855 # Return up to 512 bytes to reduce allocation overhead for tight loops.
856 return self._readbuffer[self._offset: self._offset + 512]
857
858 def readable(self):
859 return True
860
861 def read(self, n=-1):
862 """Read and return up to n bytes.
863 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200865 if n is None or n < 0:
866 buf = self._readbuffer[self._offset:]
867 self._readbuffer = b''
868 self._offset = 0
869 while not self._eof:
870 buf += self._read1(self.MAX_N)
871 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872
Antoine Pitrou78157b32012-06-23 16:44:48 +0200873 end = n + self._offset
874 if end < len(self._readbuffer):
875 buf = self._readbuffer[self._offset:end]
876 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200877 return buf
878
Antoine Pitrou78157b32012-06-23 16:44:48 +0200879 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200880 buf = self._readbuffer[self._offset:]
881 self._readbuffer = b''
882 self._offset = 0
883 while n > 0 and not self._eof:
884 data = self._read1(n)
885 if n < len(data):
886 self._readbuffer = data
887 self._offset = n
888 buf += data[:n]
889 break
890 buf += data
891 n -= len(data)
892 return buf
893
894 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000895 # Update the CRC using the given data.
896 if self._expected_crc is None:
897 # No need to compute the CRC if we don't have a reference value
898 return
Martin Panterb82032f2015-12-11 05:19:29 +0000899 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000900 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200901 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000902 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000903
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000904 def read1(self, n):
905 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200907 if n is None or n < 0:
908 buf = self._readbuffer[self._offset:]
909 self._readbuffer = b''
910 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300911 while not self._eof:
912 data = self._read1(self.MAX_N)
913 if data:
914 buf += data
915 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200916 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917
Antoine Pitrou78157b32012-06-23 16:44:48 +0200918 end = n + self._offset
919 if end < len(self._readbuffer):
920 buf = self._readbuffer[self._offset:end]
921 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200922 return buf
923
Antoine Pitrou78157b32012-06-23 16:44:48 +0200924 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200925 buf = self._readbuffer[self._offset:]
926 self._readbuffer = b''
927 self._offset = 0
928 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300929 while not self._eof:
930 data = self._read1(n)
931 if n < len(data):
932 self._readbuffer = data
933 self._offset = n
934 buf += data[:n]
935 break
936 if data:
937 buf += data
938 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200939 return buf
940
941 def _read1(self, n):
942 # Read up to n compressed bytes with at most one read() system call,
943 # decrypt and decompress them.
944 if self._eof or n <= 0:
945 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000947 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200948 if self._compress_type == ZIP_DEFLATED:
949 ## Handle unconsumed data.
950 data = self._decompressor.unconsumed_tail
951 if n > len(data):
952 data += self._read2(n - len(data))
953 else:
954 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200956 if self._compress_type == ZIP_STORED:
957 self._eof = self._compress_left <= 0
958 elif self._compress_type == ZIP_DEFLATED:
959 n = max(n, self.MIN_READ_SIZE)
960 data = self._decompressor.decompress(data, n)
961 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200962 self._compress_left <= 0 and
963 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200964 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000965 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200966 else:
967 data = self._decompressor.decompress(data)
968 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000969
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200970 data = data[:self._left]
971 self._left -= len(data)
972 if self._left <= 0:
973 self._eof = True
974 self._update_crc(data)
975 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000976
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200977 def _read2(self, n):
978 if self._compress_left <= 0:
979 return b''
980
981 n = max(n, self.MIN_READ_SIZE)
982 n = min(n, self._compress_left)
983
984 data = self._fileobj.read(n)
985 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200986 if not data:
987 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200988
989 if self._decrypter is not None:
990 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000991 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992
Łukasz Langae94980a2010-11-22 23:31:26 +0000993 def close(self):
994 try:
995 if self._close_fileobj:
996 self._fileobj.close()
997 finally:
998 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000999
Antoine Pitroua32f9a22010-01-27 21:18:57 +00001000
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001001class _ZipWriteFile(io.BufferedIOBase):
1002 def __init__(self, zf, zinfo, zip64):
1003 self._zinfo = zinfo
1004 self._zip64 = zip64
1005 self._zipfile = zf
1006 self._compressor = _get_compressor(zinfo.compress_type)
1007 self._file_size = 0
1008 self._compress_size = 0
1009 self._crc = 0
1010
1011 @property
1012 def _fileobj(self):
1013 return self._zipfile.fp
1014
1015 def writable(self):
1016 return True
1017
1018 def write(self, data):
1019 nbytes = len(data)
1020 self._file_size += nbytes
1021 self._crc = crc32(data, self._crc)
1022 if self._compressor:
1023 data = self._compressor.compress(data)
1024 self._compress_size += len(data)
1025 self._fileobj.write(data)
1026 return nbytes
1027
1028 def close(self):
1029 super().close()
1030 # Flush any data from the compressor, and update header info
1031 if self._compressor:
1032 buf = self._compressor.flush()
1033 self._compress_size += len(buf)
1034 self._fileobj.write(buf)
1035 self._zinfo.compress_size = self._compress_size
1036 else:
1037 self._zinfo.compress_size = self._file_size
1038 self._zinfo.CRC = self._crc
1039 self._zinfo.file_size = self._file_size
1040
1041 # Write updated header info
1042 if self._zinfo.flag_bits & 0x08:
1043 # Write CRC and file sizes after the file data
1044 fmt = '<LQQ' if self._zip64 else '<LLL'
1045 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1046 self._zinfo.compress_size, self._zinfo.file_size))
1047 self._zipfile.start_dir = self._fileobj.tell()
1048 else:
1049 if not self._zip64:
1050 if self._file_size > ZIP64_LIMIT:
1051 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1052 'limit')
1053 if self._compress_size > ZIP64_LIMIT:
1054 raise RuntimeError('Compressed size unexpectedly exceeded '
1055 'ZIP64 limit')
1056 # Seek backwards and write file header (which will now include
1057 # correct CRC and file sizes)
1058
1059 # Preserve current position in file
1060 self._zipfile.start_dir = self._fileobj.tell()
1061 self._fileobj.seek(self._zinfo.header_offset)
1062 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1063 self._fileobj.seek(self._zipfile.start_dir)
1064
1065 self._zipfile._writing = False
1066
1067 # Successfully written: Add file to our caches
1068 self._zipfile.filelist.append(self._zinfo)
1069 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1070
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001072 """ Class with methods to open, read, write, close, list zip files.
1073
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001074 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001075
Fred Drake3d9091e2001-03-26 15:49:24 +00001076 file: Either the path to the file, or a file-like object.
1077 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001078 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1079 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001080 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1081 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001082 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1083 needed, otherwise it will raise an exception when this would
1084 be necessary.
1085
Fred Drake3d9091e2001-03-26 15:49:24 +00001086 """
Fred Drake484d7352000-10-02 21:14:52 +00001087
Fred Drake90eac282001-02-28 05:29:34 +00001088 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001089 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001090
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001091 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001092 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1093 or append 'a'."""
1094 if mode not in ('r', 'w', 'x', 'a'):
1095 raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001096
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001097 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001098
1099 self._allowZip64 = allowZip64
1100 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001101 self.debug = 0 # Level of printing: 0 through 3
1102 self.NameToInfo = {} # Find file info given name
1103 self.filelist = [] # List of ZipInfo instances for archive
1104 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001105 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001106 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001107 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001108
Fred Drake3d9091e2001-03-26 15:49:24 +00001109 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001110 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001111 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001112 self._filePassed = 0
1113 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001114 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1115 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001116 filemode = modeDict[mode]
1117 while True:
1118 try:
1119 self.fp = io.open(file, filemode)
1120 except OSError:
1121 if filemode in modeDict:
1122 filemode = modeDict[filemode]
1123 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001124 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001125 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001126 else:
1127 self._filePassed = 1
1128 self.fp = file
1129 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001130 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001131 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001132 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001133 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001134
Antoine Pitrou17babc52012-11-17 23:50:08 +01001135 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001136 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001137 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001138 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001139 # set the modified flag so central directory gets written
1140 # even if no files are added to the archive
1141 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001142 try:
1143 self.start_dir = self.fp.tell()
1144 except (AttributeError, OSError):
1145 self.fp = _Tellable(self.fp)
1146 self.start_dir = 0
1147 self._seekable = False
1148 else:
1149 # Some file-like objects can provide tell() but not seek()
1150 try:
1151 self.fp.seek(self.start_dir)
1152 except (AttributeError, OSError):
1153 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001154 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001155 try:
1156 # See if file is a zip file
1157 self._RealGetContents()
1158 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001159 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001160 except BadZipFile:
1161 # file is not a zip file, just append
1162 self.fp.seek(0, 2)
1163
1164 # set the modified flag so central directory gets written
1165 # even if no files are added to the archive
1166 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001167 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001168 else:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001169 raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001170 except:
1171 fp = self.fp
1172 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001173 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001174 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001176 def __enter__(self):
1177 return self
1178
1179 def __exit__(self, type, value, traceback):
1180 self.close()
1181
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001182 def __repr__(self):
1183 result = ['<%s.%s' % (self.__class__.__module__,
1184 self.__class__.__qualname__)]
1185 if self.fp is not None:
1186 if self._filePassed:
1187 result.append(' file=%r' % self.fp)
1188 elif self.filename is not None:
1189 result.append(' filename=%r' % self.filename)
1190 result.append(' mode=%r' % self.mode)
1191 else:
1192 result.append(' [closed]')
1193 result.append('>')
1194 return ''.join(result)
1195
Tim Peters7d3bad62001-04-04 18:56:49 +00001196 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001197 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001198 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001199 try:
1200 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001201 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001202 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001203 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001204 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001205 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001206 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001207 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1208 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001209 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001210
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001212 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001213 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1214 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001215 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1216
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001217 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001218 inferred = concat + offset_cd
1219 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001220 # self.start_dir: Position of start of central directory
1221 self.start_dir = offset_cd + concat
1222 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001223 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001224 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001225 total = 0
1226 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001227 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001228 if len(centdir) != sizeCentralDir:
1229 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001230 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001231 if centdir[_CD_SIGNATURE] != stringCentralDir:
1232 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001234 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001235 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001236 flags = centdir[5]
1237 if flags & 0x800:
1238 # UTF-8 file names extension
1239 filename = filename.decode('utf-8')
1240 else:
1241 # Historical ZIP filename encoding
1242 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001243 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001244 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001245 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1246 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001247 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001248 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001249 x.flag_bits, x.compress_type, t, d,
1250 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001251 if x.extract_version > MAX_EXTRACT_VERSION:
1252 raise NotImplementedError("zip file version %.1f" %
1253 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001254 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1255 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001256 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001257 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001258 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001259
1260 x._decodeExtra()
1261 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001262 self.filelist.append(x)
1263 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001264
1265 # update total bytes read from central directory
1266 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1267 + centdir[_CD_EXTRA_FIELD_LENGTH]
1268 + centdir[_CD_COMMENT_LENGTH])
1269
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001270 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001271 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001272
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001273
1274 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001275 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001276 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001277
1278 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001279 """Return a list of class ZipInfo instances for files in the
1280 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001281 return self.filelist
1282
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001283 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001284 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001285 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1286 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001288 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001289 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1290 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001291
1292 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001293 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001294 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001295 for zinfo in self.filelist:
1296 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001297 # Read by chunks, to avoid an OverflowError or a
1298 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001299 with self.open(zinfo.filename, "r") as f:
1300 while f.read(chunk_size): # Check CRC-32
1301 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001302 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001303 return zinfo.filename
1304
1305 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001306 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001307 info = self.NameToInfo.get(name)
1308 if info is None:
1309 raise KeyError(
1310 'There is no item named %r in the archive' % name)
1311
1312 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313
Thomas Wouterscf297e42007-02-23 15:07:44 +00001314 def setpassword(self, pwd):
1315 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001316 if pwd and not isinstance(pwd, bytes):
1317 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1318 if pwd:
1319 self.pwd = pwd
1320 else:
1321 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001322
R David Murrayf50b38a2012-04-12 18:44:58 -04001323 @property
1324 def comment(self):
1325 """The comment text associated with the ZIP file."""
1326 return self._comment
1327
1328 @comment.setter
1329 def comment(self, comment):
1330 if not isinstance(comment, bytes):
1331 raise TypeError("comment: expected bytes, got %s" % type(comment))
1332 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001333 if len(comment) > ZIP_MAX_COMMENT:
1334 import warnings
1335 warnings.warn('Archive comment is too long; truncating to %d bytes'
1336 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001337 comment = comment[:ZIP_MAX_COMMENT]
1338 self._comment = comment
1339 self._didModify = True
1340
Thomas Wouterscf297e42007-02-23 15:07:44 +00001341 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001342 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001343 with self.open(name, "r", pwd) as fp:
1344 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001345
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001346 def open(self, name, mode="r", pwd=None, force_zip64=False):
1347 """Return file-like object for 'name'.
1348
1349 name is a string for the file name within the ZIP file, or a ZipInfo
1350 object.
1351
1352 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1353 write to a file newly added to the archive.
1354
1355 pwd is the password to decrypt files (only used for reading).
1356
1357 When writing, if the file size is not known in advance but may exceed
1358 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1359 files. If the size is known in advance, it is best to pass a ZipInfo
1360 instance for name, with zinfo.file_size set.
1361 """
1362 if mode not in {"r", "w", "U", "rU"}:
1363 raise RuntimeError('open() requires mode "r", "w", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001364 if 'U' in mode:
1365 import warnings
1366 warnings.warn("'U' mode is deprecated",
1367 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001368 if pwd and not isinstance(pwd, bytes):
1369 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001370 if pwd and (mode == "w"):
1371 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001372 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001373 raise RuntimeError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001374 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001375
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001376 # Make sure we have an info object
1377 if isinstance(name, ZipInfo):
1378 # 'name' is already an info object
1379 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001380 elif mode == 'w':
1381 zinfo = ZipInfo(name)
1382 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001383 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001384 # Get info object for name
1385 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001386
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001387 if mode == 'w':
1388 return self._open_to_write(zinfo, force_zip64=force_zip64)
1389
1390 if self._writing:
1391 raise RuntimeError("Can't read from the ZIP file while there "
1392 "is an open writing handle on it. "
1393 "Close the writing handle before trying to read.")
1394
1395 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001396 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001397 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1398 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001399 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001400 # Skip the file header:
1401 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001402 if len(fheader) != sizeFileHeader:
1403 raise BadZipFile("Truncated file header")
1404 fheader = struct.unpack(structFileHeader, fheader)
1405 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001406 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001407
Antoine Pitrou17babc52012-11-17 23:50:08 +01001408 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1409 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1410 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001411
Antoine Pitrou8572da52012-11-17 23:52:05 +01001412 if zinfo.flag_bits & 0x20:
1413 # Zip 2.7: compressed patched data
1414 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001415
Antoine Pitrou8572da52012-11-17 23:52:05 +01001416 if zinfo.flag_bits & 0x40:
1417 # strong encryption
1418 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001419
Antoine Pitrou17babc52012-11-17 23:50:08 +01001420 if zinfo.flag_bits & 0x800:
1421 # UTF-8 filename
1422 fname_str = fname.decode("utf-8")
1423 else:
1424 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001425
Antoine Pitrou17babc52012-11-17 23:50:08 +01001426 if fname_str != zinfo.orig_filename:
1427 raise BadZipFile(
1428 'File name in directory %r and header %r differ.'
1429 % (zinfo.orig_filename, fname))
1430
1431 # check for encrypted flag & handle password
1432 is_encrypted = zinfo.flag_bits & 0x1
1433 zd = None
1434 if is_encrypted:
1435 if not pwd:
1436 pwd = self.pwd
1437 if not pwd:
1438 raise RuntimeError("File %s is encrypted, password "
1439 "required for extraction" % name)
1440
1441 zd = _ZipDecrypter(pwd)
1442 # The first 12 bytes in the cypher stream is an encryption header
1443 # used to strengthen the algorithm. The first 11 bytes are
1444 # completely random, while the 12th contains the MSB of the CRC,
1445 # or the MSB of the file time depending on the header type
1446 # and is used to check the correctness of the password.
1447 header = zef_file.read(12)
1448 h = list(map(zd, header[0:12]))
1449 if zinfo.flag_bits & 0x8:
1450 # compare against the file type from extended local headers
1451 check_byte = (zinfo._raw_time >> 8) & 0xff
1452 else:
1453 # compare against the CRC otherwise
1454 check_byte = (zinfo.CRC >> 24) & 0xff
1455 if h[11] != check_byte:
1456 raise RuntimeError("Bad password for file", name)
1457
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001458 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001459 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001460 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001461 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001462
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001463 def _open_to_write(self, zinfo, force_zip64=False):
1464 if force_zip64 and not self._allowZip64:
1465 raise ValueError(
1466 "force_zip64 is True, but allowZip64 was False when opening "
1467 "the ZIP file."
1468 )
1469 if self._writing:
1470 raise RuntimeError("Can't write to the ZIP file while there is "
1471 "another write handle open on it. "
1472 "Close the first handle before opening another.")
1473
1474 # Sizes and CRC are overwritten with correct data after processing the file
1475 if not hasattr(zinfo, 'file_size'):
1476 zinfo.file_size = 0
1477 zinfo.compress_size = 0
1478 zinfo.CRC = 0
1479
1480 zinfo.flag_bits = 0x00
1481 if zinfo.compress_type == ZIP_LZMA:
1482 # Compressed data includes an end-of-stream (EOS) marker
1483 zinfo.flag_bits |= 0x02
1484 if not self._seekable:
1485 zinfo.flag_bits |= 0x08
1486
1487 if not zinfo.external_attr:
1488 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1489
1490 # Compressed size can be larger than uncompressed size
1491 zip64 = self._allowZip64 and \
1492 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1493
1494 if self._seekable:
1495 self.fp.seek(self.start_dir)
1496 zinfo.header_offset = self.fp.tell()
1497
1498 self._writecheck(zinfo)
1499 self._didModify = True
1500
1501 self.fp.write(zinfo.FileHeader(zip64))
1502
1503 self._writing = True
1504 return _ZipWriteFile(self, zinfo, zip64)
1505
Christian Heimes790c8232008-01-07 21:14:23 +00001506 def extract(self, member, path=None, pwd=None):
1507 """Extract a member from the archive to the current working directory,
1508 using its full name. Its file information is extracted as accurately
1509 as possible. `member' may be a filename or a ZipInfo object. You can
1510 specify a different directory using `path'.
1511 """
1512 if not isinstance(member, ZipInfo):
1513 member = self.getinfo(member)
1514
1515 if path is None:
1516 path = os.getcwd()
1517
1518 return self._extract_member(member, path, pwd)
1519
1520 def extractall(self, path=None, members=None, pwd=None):
1521 """Extract all members from the archive to the current working
1522 directory. `path' specifies a different directory to extract to.
1523 `members' is optional and must be a subset of the list returned
1524 by namelist().
1525 """
1526 if members is None:
1527 members = self.namelist()
1528
1529 for zipinfo in members:
1530 self.extract(zipinfo, path, pwd)
1531
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001532 @classmethod
1533 def _sanitize_windows_name(cls, arcname, pathsep):
1534 """Replace bad characters and remove trailing dots from parts."""
1535 table = cls._windows_illegal_name_trans_table
1536 if not table:
1537 illegal = ':<>|"?*'
1538 table = str.maketrans(illegal, '_' * len(illegal))
1539 cls._windows_illegal_name_trans_table = table
1540 arcname = arcname.translate(table)
1541 # remove trailing dots
1542 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1543 # rejoin, removing empty parts.
1544 arcname = pathsep.join(x for x in arcname if x)
1545 return arcname
1546
Christian Heimes790c8232008-01-07 21:14:23 +00001547 def _extract_member(self, member, targetpath, pwd):
1548 """Extract the ZipInfo object 'member' to a physical
1549 file on the path targetpath.
1550 """
1551 # build the destination pathname, replacing
1552 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001553 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001554
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001555 if os.path.altsep:
1556 arcname = arcname.replace(os.path.altsep, os.path.sep)
1557 # interpret absolute pathname as relative, remove drive letter or
1558 # UNC path, redundant separators, "." and ".." components.
1559 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001560 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001561 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001562 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001563 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001564 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001565 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001566
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001567 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001568 targetpath = os.path.normpath(targetpath)
1569
1570 # Create all upper directories if necessary.
1571 upperdirs = os.path.dirname(targetpath)
1572 if upperdirs and not os.path.exists(upperdirs):
1573 os.makedirs(upperdirs)
1574
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001575 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001576 if not os.path.isdir(targetpath):
1577 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001578 return targetpath
1579
Antoine Pitrou17babc52012-11-17 23:50:08 +01001580 with self.open(member, pwd=pwd) as source, \
1581 open(targetpath, "wb") as target:
1582 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001583
1584 return targetpath
1585
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001586 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001587 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001588 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001589 import warnings
1590 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001591 if self.mode not in ('w', 'x', 'a'):
1592 raise RuntimeError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001593 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001594 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001595 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001596 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001597 if not self._allowZip64:
1598 requires_zip64 = None
1599 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1600 requires_zip64 = "Files count"
1601 elif zinfo.file_size > ZIP64_LIMIT:
1602 requires_zip64 = "Filesize"
1603 elif zinfo.header_offset > ZIP64_LIMIT:
1604 requires_zip64 = "Zipfile size"
1605 if requires_zip64:
1606 raise LargeZipFile(requires_zip64 +
1607 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001608
1609 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001610 """Put the bytes from filename into the archive under the name
1611 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001612 if not self.fp:
1613 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001614 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001615 if self._writing:
1616 raise RuntimeError(
1617 "Can't write to ZIP archive while an open writing handle exists"
1618 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001619
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001620 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001621
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001622 if zinfo.is_dir():
1623 zinfo.compress_size = 0
1624 zinfo.CRC = 0
1625 else:
1626 if compress_type is not None:
1627 zinfo.compress_type = compress_type
1628 else:
1629 zinfo.compress_type = self.compression
1630
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001631 if zinfo.is_dir():
1632 with self._lock:
1633 if self._seekable:
1634 self.fp.seek(self.start_dir)
1635 zinfo.header_offset = self.fp.tell() # Start of header bytes
1636 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001637 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001638 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001639
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001640 self._writecheck(zinfo)
1641 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001642
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001643 self.filelist.append(zinfo)
1644 self.NameToInfo[zinfo.filename] = zinfo
1645 self.fp.write(zinfo.FileHeader(False))
1646 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001647 else:
1648 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1649 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001650
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001651 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001652 """Write a file into the archive. The contents is 'data', which
1653 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1654 it is encoded as UTF-8 first.
1655 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001656 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001657 if isinstance(data, str):
1658 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001659 if not isinstance(zinfo_or_arcname, ZipInfo):
1660 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001661 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001662 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001663 if zinfo.filename[-1] == '/':
1664 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1665 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1666 else:
1667 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001668 else:
1669 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001670
1671 if not self.fp:
1672 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001673 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001674 if self._writing:
1675 raise RuntimeError(
1676 "Can't write to ZIP archive while an open writing handle exists."
1677 )
1678
1679 if compress_type is not None:
1680 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001681
Guido van Rossum85825dc2007-08-27 17:03:28 +00001682 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001683 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001684 with self.open(zinfo, mode='w') as dest:
1685 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001686
1687 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001688 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001689 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001690
1691 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001692 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001693 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001694 if self.fp is None:
1695 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001696
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001697 if self._writing:
1698 raise RuntimeError("Can't close the ZIP file while there is "
1699 "an open writing handle on it. "
1700 "Close the writing handle before closing the zip.")
1701
Antoine Pitrou17babc52012-11-17 23:50:08 +01001702 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001703 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001704 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001705 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001706 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001707 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001708 finally:
1709 fp = self.fp
1710 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001711 self._fpclose(fp)
1712
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001713 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001714 for zinfo in self.filelist: # write central directory
1715 dt = zinfo.date_time
1716 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1717 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1718 extra = []
1719 if zinfo.file_size > ZIP64_LIMIT \
1720 or zinfo.compress_size > ZIP64_LIMIT:
1721 extra.append(zinfo.file_size)
1722 extra.append(zinfo.compress_size)
1723 file_size = 0xffffffff
1724 compress_size = 0xffffffff
1725 else:
1726 file_size = zinfo.file_size
1727 compress_size = zinfo.compress_size
1728
1729 if zinfo.header_offset > ZIP64_LIMIT:
1730 extra.append(zinfo.header_offset)
1731 header_offset = 0xffffffff
1732 else:
1733 header_offset = zinfo.header_offset
1734
1735 extra_data = zinfo.extra
1736 min_version = 0
1737 if extra:
1738 # Append a ZIP64 field to the extra's
1739 extra_data = struct.pack(
1740 '<HH' + 'Q'*len(extra),
1741 1, 8*len(extra), *extra) + extra_data
1742
1743 min_version = ZIP64_VERSION
1744
1745 if zinfo.compress_type == ZIP_BZIP2:
1746 min_version = max(BZIP2_VERSION, min_version)
1747 elif zinfo.compress_type == ZIP_LZMA:
1748 min_version = max(LZMA_VERSION, min_version)
1749
1750 extract_version = max(min_version, zinfo.extract_version)
1751 create_version = max(min_version, zinfo.create_version)
1752 try:
1753 filename, flag_bits = zinfo._encodeFilenameFlags()
1754 centdir = struct.pack(structCentralDir,
1755 stringCentralDir, create_version,
1756 zinfo.create_system, extract_version, zinfo.reserved,
1757 flag_bits, zinfo.compress_type, dostime, dosdate,
1758 zinfo.CRC, compress_size, file_size,
1759 len(filename), len(extra_data), len(zinfo.comment),
1760 0, zinfo.internal_attr, zinfo.external_attr,
1761 header_offset)
1762 except DeprecationWarning:
1763 print((structCentralDir, stringCentralDir, create_version,
1764 zinfo.create_system, extract_version, zinfo.reserved,
1765 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1766 zinfo.CRC, compress_size, file_size,
1767 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1768 0, zinfo.internal_attr, zinfo.external_attr,
1769 header_offset), file=sys.stderr)
1770 raise
1771 self.fp.write(centdir)
1772 self.fp.write(filename)
1773 self.fp.write(extra_data)
1774 self.fp.write(zinfo.comment)
1775
1776 pos2 = self.fp.tell()
1777 # Write end-of-zip-archive record
1778 centDirCount = len(self.filelist)
1779 centDirSize = pos2 - self.start_dir
1780 centDirOffset = self.start_dir
1781 requires_zip64 = None
1782 if centDirCount > ZIP_FILECOUNT_LIMIT:
1783 requires_zip64 = "Files count"
1784 elif centDirOffset > ZIP64_LIMIT:
1785 requires_zip64 = "Central directory offset"
1786 elif centDirSize > ZIP64_LIMIT:
1787 requires_zip64 = "Central directory size"
1788 if requires_zip64:
1789 # Need to write the ZIP64 end-of-archive records
1790 if not self._allowZip64:
1791 raise LargeZipFile(requires_zip64 +
1792 " would require ZIP64 extensions")
1793 zip64endrec = struct.pack(
1794 structEndArchive64, stringEndArchive64,
1795 44, 45, 45, 0, 0, centDirCount, centDirCount,
1796 centDirSize, centDirOffset)
1797 self.fp.write(zip64endrec)
1798
1799 zip64locrec = struct.pack(
1800 structEndArchive64Locator,
1801 stringEndArchive64Locator, 0, pos2, 1)
1802 self.fp.write(zip64locrec)
1803 centDirCount = min(centDirCount, 0xFFFF)
1804 centDirSize = min(centDirSize, 0xFFFFFFFF)
1805 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1806
1807 endrec = struct.pack(structEndArchive, stringEndArchive,
1808 0, 0, centDirCount, centDirCount,
1809 centDirSize, centDirOffset, len(self._comment))
1810 self.fp.write(endrec)
1811 self.fp.write(self._comment)
1812 self.fp.flush()
1813
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001814 def _fpclose(self, fp):
1815 assert self._fileRefCnt > 0
1816 self._fileRefCnt -= 1
1817 if not self._fileRefCnt and not self._filePassed:
1818 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001819
1820
1821class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001822 """Class to create ZIP archives with Python library files and packages."""
1823
Georg Brandl8334fd92010-12-04 10:26:46 +00001824 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001825 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001826 ZipFile.__init__(self, file, mode=mode, compression=compression,
1827 allowZip64=allowZip64)
1828 self._optimize = optimize
1829
Christian Tismer59202e52013-10-21 03:59:23 +02001830 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001831 """Add all files from "pathname" to the ZIP archive.
1832
Fred Drake484d7352000-10-02 21:14:52 +00001833 If pathname is a package directory, search the directory and
1834 all package subdirectories recursively for all *.py and enter
1835 the modules into the archive. If pathname is a plain
1836 directory, listdir *.py and enter all modules. Else, pathname
1837 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001838 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001839 This method will compile the module.py into module.pyc if
1840 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001841 If filterfunc(pathname) is given, it is called with every argument.
1842 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001843 """
Christian Tismer59202e52013-10-21 03:59:23 +02001844 if filterfunc and not filterfunc(pathname):
1845 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001846 label = 'path' if os.path.isdir(pathname) else 'file'
1847 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001848 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001849 dir, name = os.path.split(pathname)
1850 if os.path.isdir(pathname):
1851 initname = os.path.join(pathname, "__init__.py")
1852 if os.path.isfile(initname):
1853 # This is a package directory, add it
1854 if basename:
1855 basename = "%s/%s" % (basename, name)
1856 else:
1857 basename = name
1858 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001859 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001860 fname, arcname = self._get_codename(initname[0:-3], basename)
1861 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001862 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001863 self.write(fname, arcname)
1864 dirlist = os.listdir(pathname)
1865 dirlist.remove("__init__.py")
1866 # Add all *.py files and package subdirectories
1867 for filename in dirlist:
1868 path = os.path.join(pathname, filename)
1869 root, ext = os.path.splitext(filename)
1870 if os.path.isdir(path):
1871 if os.path.isfile(os.path.join(path, "__init__.py")):
1872 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001873 self.writepy(path, basename,
1874 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001875 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001876 if filterfunc and not filterfunc(path):
1877 if self.debug:
1878 print('file "%s" skipped by filterfunc' % path)
1879 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001880 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001881 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001882 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001883 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001884 self.write(fname, arcname)
1885 else:
1886 # This is NOT a package directory, add its files at top level
1887 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001888 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001889 for filename in os.listdir(pathname):
1890 path = os.path.join(pathname, filename)
1891 root, ext = os.path.splitext(filename)
1892 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001893 if filterfunc and not filterfunc(path):
1894 if self.debug:
1895 print('file "%s" skipped by filterfunc' % path)
1896 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001897 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001898 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001899 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001900 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001901 self.write(fname, arcname)
1902 else:
1903 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001904 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001905 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001906 fname, arcname = self._get_codename(pathname[0:-3], basename)
1907 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001908 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001909 self.write(fname, arcname)
1910
1911 def _get_codename(self, pathname, basename):
1912 """Return (filename, archivename) for the path.
1913
Fred Drake484d7352000-10-02 21:14:52 +00001914 Given a module name path, return the correct file path and
1915 archive name, compiling if necessary. For example, given
1916 /python/lib/string, return (/python/lib/string.pyc, string).
1917 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001918 def _compile(file, optimize=-1):
1919 import py_compile
1920 if self.debug:
1921 print("Compiling", file)
1922 try:
1923 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001924 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001925 print(err.msg)
1926 return False
1927 return True
1928
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001929 file_py = pathname + ".py"
1930 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001931 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1932 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1933 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 if self._optimize == -1:
1935 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001936 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001937 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1938 # Use .pyc file.
1939 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001940 elif (os.path.isfile(pycache_opt0) and
1941 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001942 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1943 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001944 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001945 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001946 elif (os.path.isfile(pycache_opt1) and
1947 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1948 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001949 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001950 fname = pycache_opt1
1951 arcname = file_pyc
1952 elif (os.path.isfile(pycache_opt2) and
1953 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1954 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1955 # file name in the archive.
1956 fname = pycache_opt2
1957 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001958 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001959 # Compile py into PEP 3147 pyc file.
1960 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001961 if sys.flags.optimize == 0:
1962 fname = pycache_opt0
1963 elif sys.flags.optimize == 1:
1964 fname = pycache_opt1
1965 else:
1966 fname = pycache_opt2
1967 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001968 else:
1969 fname = arcname = file_py
1970 else:
1971 # new mode: use given optimization level
1972 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001973 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001974 arcname = file_pyc
1975 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001976 arcname = file_pyc
1977 if self._optimize == 1:
1978 fname = pycache_opt1
1979 elif self._optimize == 2:
1980 fname = pycache_opt2
1981 else:
1982 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1983 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001984 if not (os.path.isfile(fname) and
1985 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1986 if not _compile(file_py, optimize=self._optimize):
1987 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001988 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001989 if basename:
1990 archivename = "%s/%s" % (basename, archivename)
1991 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001992
1993
1994def main(args = None):
1995 import textwrap
1996 USAGE=textwrap.dedent("""\
1997 Usage:
1998 zipfile.py -l zipfile.zip # Show listing of a zipfile
1999 zipfile.py -t zipfile.zip # Test if a zipfile is valid
2000 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
2001 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
2002 """)
2003 if args is None:
2004 args = sys.argv[1:]
2005
2006 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002007 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002008 sys.exit(1)
2009
2010 if args[0] == '-l':
2011 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002012 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002013 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01002014 with ZipFile(args[1], 'r') as zf:
2015 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002016
2017 elif args[0] == '-t':
2018 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002019 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002020 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01002021 with ZipFile(args[1], 'r') as zf:
2022 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00002023 if badfile:
2024 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002025 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002026
2027 elif args[0] == '-e':
2028 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002029 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002030 sys.exit(1)
2031
Antoine Pitrou17babc52012-11-17 23:50:08 +01002032 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03002033 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002034
2035 elif args[0] == '-c':
2036 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002037 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002038 sys.exit(1)
2039
2040 def addToZip(zf, path, zippath):
2041 if os.path.isfile(path):
2042 zf.write(path, zippath, ZIP_DEFLATED)
2043 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002044 if zippath:
2045 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002046 for nm in os.listdir(path):
2047 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002048 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002049 # else: ignore
2050
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02002051 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002052 for path in args[2:]:
2053 zippath = os.path.basename(path)
2054 if not zippath:
2055 zippath = os.path.basename(os.path.dirname(path))
2056 if zippath in ('', os.curdir, os.pardir):
2057 zippath = ''
2058 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002059
2060if __name__ == "__main__":
2061 main()