blob: e0598d27ed76523c8efc3ea3e3c80825e5bcdae1 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Serhiy Storchaka9e777732015-10-10 19:43:32 +030017try:
18 import threading
19except ImportError:
20 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021
22try:
Tim Peterse1190062001-01-15 03:34:38 +000023 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040025except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000027 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029try:
30 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020032 bz2 = None
33
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034try:
35 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040036except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 lzma = None
38
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020039__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020040 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000041 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Georg Brandl4d540882010-10-28 06:42:33 +000043class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
Georg Brandl4d540882010-10-28 06:42:33 +000053error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000056ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030057ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000058ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020064ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000065# Other ZIP compression methods not supported
66
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020067DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020071# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020072MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020073
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000079
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000082structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000102stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000103sizeCentralDir = struct.calcsize(structCentralDir)
104
Fred Drake3e038e52001-02-28 17:56:26 +0000105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000129stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130sizeFileHeader = struct.calcsize(structFileHeader)
131
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000167def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000169 if _EndRecData(fp):
170 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200171 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000172 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000173 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000174
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000175def is_zipfile(filename):
176 """Quickly see if a file is a ZIP file by checking the magic number.
177
178 The filename argument may be a file or file-like object too.
179 """
180 result = False
181 try:
182 if hasattr(filename, "read"):
183 result = _check_zipfile(fp=filename)
184 else:
185 with open(filename, "rb") as fp:
186 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200187 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188 pass
189 return result
190
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000191def _EndRecData64(fpin, offset, endrec):
192 """
193 Read the ZIP64 end-of-archive records and use that to update endrec
194 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 try:
196 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200197 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000198 # If the seek fails, the file is not large enough to contain a ZIP64
199 # end-of-archive record, so just return the end record we were given.
200 return endrec
201
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000202 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200203 if len(data) != sizeEndCentDir64Locator:
204 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
206 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207 return endrec
208
209 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000210 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000211
212 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
214 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200215 if len(data) != sizeEndCentDir64:
216 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200218 dircount, dircount2, dirsize, diroffset = \
219 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000220 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000221 return endrec
222
223 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000224 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 endrec[_ECD_DISK_NUMBER] = disk_num
226 endrec[_ECD_DISK_START] = disk_dir
227 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
228 endrec[_ECD_ENTRIES_TOTAL] = dircount2
229 endrec[_ECD_SIZE] = dirsize
230 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000231 return endrec
232
233
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234def _EndRecData(fpin):
235 """Return data from the "End of Central Directory" record, or None.
236
237 The data is a list of the nine items in the ZIP "End of central dir"
238 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000239
240 # Determine file size
241 fpin.seek(0, 2)
242 filesize = fpin.tell()
243
244 # Check to see if this is ZIP file with no archive comment (the
245 # "end of central directory" structure should be the last item in the
246 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 try:
248 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000250 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000251 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200252 if (len(data) == sizeEndCentDir and
253 data[0:4] == stringEndArchive and
254 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000256 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257 endrec=list(endrec)
258
259 # Append a blank comment and record start offset
260 endrec.append(b"")
261 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000263 # Try to read the "Zip64 end of central directory" structure
264 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265
266 # Either this is not a ZIP file, or it is a ZIP file with an archive
267 # comment. Search the end of the file for the "end of central directory"
268 # record signature. The comment is the last item in the ZIP file and may be
269 # up to 64K long. It is assumed that the "end of central directory" magic
270 # number does not appear in the comment.
271 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
272 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000273 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000274 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 if start >= 0:
276 # found the magic number; attempt to unpack and interpret
277 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200278 if len(recData) != sizeEndCentDir:
279 # Zip file is corrupted.
280 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000281 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
283 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
284 endrec.append(comment)
285 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000286
R David Murray4fbb9db2011-06-09 15:50:51 -0400287 # Try to read the "Zip64 end of central directory" structure
288 return _EndRecData64(fpin, maxCommentStart + start - filesize,
289 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000290
291 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200292 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000293
Fred Drake484d7352000-10-02 21:14:52 +0000294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000296 """Class with attributes describing each file in the ZIP archive."""
297
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'orig_filename',
300 'filename',
301 'date_time',
302 'compress_type',
303 'comment',
304 'extra',
305 'create_system',
306 'create_version',
307 'extract_version',
308 'reserved',
309 'flag_bits',
310 'volume',
311 'internal_attr',
312 'external_attr',
313 'header_offset',
314 'CRC',
315 'compress_size',
316 'file_size',
317 '_raw_time',
318 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000322
323 # Terminate the file name at the first null byte. Null bytes in file
324 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000325 null_byte = filename.find(chr(0))
326 if null_byte >= 0:
327 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000328 # This is used to ensure paths in generated ZIP files always use
329 # forward slashes as the directory separator, as required by the
330 # ZIP format specification.
331 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000332 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000333
Greg Ward8e36d282003-06-18 00:53:06 +0000334 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000335 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800336
337 if date_time[0] < 1980:
338 raise ValueError('ZIP does not support timestamps before 1980')
339
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000341 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000342 self.comment = b"" # Comment for each file
343 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000344 if sys.platform == 'win32':
345 self.create_system = 0 # System which created ZIP archive
346 else:
347 # Assume everything else is unix-y
348 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200349 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
350 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000351 self.reserved = 0 # Must be zero
352 self.flag_bits = 0 # ZIP flag bits
353 self.volume = 0 # Volume number of file header
354 self.internal_attr = 0 # Internal attributes
355 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000357 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000358 # CRC CRC-32 of the uncompressed file
359 # compress_size Size of the compressed file
360 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200362 def __repr__(self):
363 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
364 if self.compress_type != ZIP_STORED:
365 result.append(' compress_type=%s' %
366 compressor_names.get(self.compress_type,
367 self.compress_type))
368 hi = self.external_attr >> 16
369 lo = self.external_attr & 0xFFFF
370 if hi:
371 result.append(' filemode=%r' % stat.filemode(hi))
372 if lo:
373 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200374 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200375 if not isdir or self.file_size:
376 result.append(' file_size=%r' % self.file_size)
377 if ((not isdir or self.compress_size) and
378 (self.compress_type != ZIP_STORED or
379 self.file_size != self.compress_size)):
380 result.append(' compress_size=%r' % self.compress_size)
381 result.append('>')
382 return ''.join(result)
383
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200384 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000385 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 dt = self.date_time
387 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000388 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000390 # Set these to zero because we write them after the file data
391 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392 else:
Tim Peterse1190062001-01-15 03:34:38 +0000393 CRC = self.CRC
394 compress_size = self.compress_size
395 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396
397 extra = self.extra
398
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200399 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200400 if zip64 is None:
401 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
402 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000403 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200405 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200406 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
407 if not zip64:
408 raise LargeZipFile("Filesize would require ZIP64 extensions")
409 # File is larger than what fits into a 4 byte integer,
410 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000411 file_size = 0xffffffff
412 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200413 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200415 if self.compress_type == ZIP_BZIP2:
416 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200417 elif self.compress_type == ZIP_LZMA:
418 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200419
420 self.extract_version = max(min_version, self.extract_version)
421 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000422 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000423 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200424 self.extract_version, self.reserved, flag_bits,
425 self.compress_type, dostime, dosdate, CRC,
426 compress_size, file_size,
427 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000428 return header + filename + extra
429
430 def _encodeFilenameFlags(self):
431 try:
432 return self.filename.encode('ascii'), self.flag_bits
433 except UnicodeEncodeError:
434 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000435
436 def _decodeExtra(self):
437 # Try to decode the extra field.
438 extra = self.extra
439 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700440 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if tp == 1:
443 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000444 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000446 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000447 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000448 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449 elif ln == 0:
450 counts = ()
451 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000452 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
454 idx = 0
455
456 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000457 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 self.file_size = counts[idx]
459 idx += 1
460
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000461 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000462 self.compress_size = counts[idx]
463 idx += 1
464
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000465 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000466 old = self.header_offset
467 self.header_offset = counts[idx]
468 idx+=1
469
470 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200472 @classmethod
473 def from_file(cls, filename, arcname=None):
474 """Construct an appropriate ZipInfo for a file on the filesystem.
475
476 filename should be the path to a file or directory on the filesystem.
477
478 arcname is the name which it will have within the archive (by default,
479 this will be the same as filename, but without a drive letter and with
480 leading path separators removed).
481 """
482 st = os.stat(filename)
483 isdir = stat.S_ISDIR(st.st_mode)
484 mtime = time.localtime(st.st_mtime)
485 date_time = mtime[0:6]
486 # Create ZipInfo instance to store file information
487 if arcname is None:
488 arcname = filename
489 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
490 while arcname[0] in (os.sep, os.altsep):
491 arcname = arcname[1:]
492 if isdir:
493 arcname += '/'
494 zinfo = cls(arcname, date_time)
495 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
496 if isdir:
497 zinfo.file_size = 0
498 zinfo.external_attr |= 0x10 # MS-DOS directory flag
499 else:
500 zinfo.file_size = st.st_size
501
502 return zinfo
503
504 def is_dir(self):
505 return self.filename[-1] == '/'
506
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000507
Thomas Wouterscf297e42007-02-23 15:07:44 +0000508class _ZipDecrypter:
509 """Class to handle decryption of files stored within a ZIP archive.
510
511 ZIP supports a password-based form of encryption. Even though known
512 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000513 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000514
515 Usage:
516 zd = _ZipDecrypter(mypwd)
517 plain_char = zd(cypher_char)
518 plain_text = map(zd, cypher_text)
519 """
520
521 def _GenerateCRCTable():
522 """Generate a CRC-32 table.
523
524 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
525 internal keys. We noticed that a direct implementation is faster than
526 relying on binascii.crc32().
527 """
528 poly = 0xedb88320
529 table = [0] * 256
530 for i in range(256):
531 crc = i
532 for j in range(8):
533 if crc & 1:
534 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
535 else:
536 crc = ((crc >> 1) & 0x7FFFFFFF)
537 table[i] = crc
538 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500539 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000540
541 def _crc32(self, ch, crc):
542 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000543 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000544
545 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500546 if _ZipDecrypter.crctable is None:
547 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000548 self.key0 = 305419896
549 self.key1 = 591751049
550 self.key2 = 878082192
551 for p in pwd:
552 self._UpdateKeys(p)
553
554 def _UpdateKeys(self, c):
555 self.key0 = self._crc32(c, self.key0)
556 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
557 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000558 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000559
560 def __call__(self, c):
561 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000562 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000563 k = self.key2 | 2
564 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000565 self._UpdateKeys(c)
566 return c
567
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200568
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200569class LZMACompressor:
570
571 def __init__(self):
572 self._comp = None
573
574 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200575 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200576 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200577 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200578 ])
579 return struct.pack('<BBH', 9, 4, len(props)) + props
580
581 def compress(self, data):
582 if self._comp is None:
583 return self._init() + self._comp.compress(data)
584 return self._comp.compress(data)
585
586 def flush(self):
587 if self._comp is None:
588 return self._init() + self._comp.flush()
589 return self._comp.flush()
590
591
592class LZMADecompressor:
593
594 def __init__(self):
595 self._decomp = None
596 self._unconsumed = b''
597 self.eof = False
598
599 def decompress(self, data):
600 if self._decomp is None:
601 self._unconsumed += data
602 if len(self._unconsumed) <= 4:
603 return b''
604 psize, = struct.unpack('<H', self._unconsumed[2:4])
605 if len(self._unconsumed) <= 4 + psize:
606 return b''
607
608 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200609 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
610 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200611 ])
612 data = self._unconsumed[4 + psize:]
613 del self._unconsumed
614
615 result = self._decomp.decompress(data)
616 self.eof = self._decomp.eof
617 return result
618
619
620compressor_names = {
621 0: 'store',
622 1: 'shrink',
623 2: 'reduce',
624 3: 'reduce',
625 4: 'reduce',
626 5: 'reduce',
627 6: 'implode',
628 7: 'tokenize',
629 8: 'deflate',
630 9: 'deflate64',
631 10: 'implode',
632 12: 'bzip2',
633 14: 'lzma',
634 18: 'terse',
635 19: 'lz77',
636 97: 'wavpack',
637 98: 'ppmd',
638}
639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640def _check_compression(compression):
641 if compression == ZIP_STORED:
642 pass
643 elif compression == ZIP_DEFLATED:
644 if not zlib:
645 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200646 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647 elif compression == ZIP_BZIP2:
648 if not bz2:
649 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200650 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200651 elif compression == ZIP_LZMA:
652 if not lzma:
653 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200654 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200655 else:
656 raise RuntimeError("That compression method is not supported")
657
658
659def _get_compressor(compress_type):
660 if compress_type == ZIP_DEFLATED:
661 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200662 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200663 elif compress_type == ZIP_BZIP2:
664 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200665 elif compress_type == ZIP_LZMA:
666 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 else:
668 return None
669
670
671def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200672 if compress_type == ZIP_STORED:
673 return None
674 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200675 return zlib.decompressobj(-15)
676 elif compress_type == ZIP_BZIP2:
677 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200678 elif compress_type == ZIP_LZMA:
679 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200680 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200681 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200682 if descr:
683 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
684 else:
685 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200686
687
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200688class _SharedFile:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200689 def __init__(self, file, pos, close, lock):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200690 self._file = file
691 self._pos = pos
692 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200693 self._lock = lock
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200694
695 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200696 with self._lock:
697 self._file.seek(self._pos)
698 data = self._file.read(n)
699 self._pos = self._file.tell()
700 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200701
702 def close(self):
703 if self._file is not None:
704 fileobj = self._file
705 self._file = None
706 self._close(fileobj)
707
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200708# Provide the tell method for unseekable stream
709class _Tellable:
710 def __init__(self, fp):
711 self.fp = fp
712 self.offset = 0
713
714 def write(self, data):
715 n = self.fp.write(data)
716 self.offset += n
717 return n
718
719 def tell(self):
720 return self.offset
721
722 def flush(self):
723 self.fp.flush()
724
725 def close(self):
726 self.fp.close()
727
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200728
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000729class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000730 """File-like object for reading an archive member.
731 Is returned by ZipFile.open().
732 """
733
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000734 # Max size supported by decompressor.
735 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000736
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000737 # Read from compressed files in 4k blocks.
738 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000739
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000740 # Search for universal newlines or line chunks.
741 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
742
Łukasz Langae94980a2010-11-22 23:31:26 +0000743 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
744 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000745 self._fileobj = fileobj
746 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000747 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000748
Ezio Melotti92b47432010-01-28 01:44:41 +0000749 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000750 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200751 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000752
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200753 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000754
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200755 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000756 self._readbuffer = b''
757 self._offset = 0
758
759 self._universal = 'U' in mode
760 self.newlines = None
761
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000762 # Adjust read size for encrypted files since the first 12 bytes
763 # are for the encryption/password information.
764 if self._decrypter is not None:
765 self._compress_left -= 12
766
767 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000768 self.name = zipinfo.filename
769
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000770 if hasattr(zipinfo, 'CRC'):
771 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000772 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000773 else:
774 self._expected_crc = None
775
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200776 def __repr__(self):
777 result = ['<%s.%s' % (self.__class__.__module__,
778 self.__class__.__qualname__)]
779 if not self.closed:
780 result.append(' name=%r mode=%r' % (self.name, self.mode))
781 if self._compress_type != ZIP_STORED:
782 result.append(' compress_type=%s' %
783 compressor_names.get(self._compress_type,
784 self._compress_type))
785 else:
786 result.append(' [closed]')
787 result.append('>')
788 return ''.join(result)
789
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000790 def readline(self, limit=-1):
791 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000792
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000793 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000795
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000796 if not self._universal and limit < 0:
797 # Shortcut common case - newline found in buffer.
798 i = self._readbuffer.find(b'\n', self._offset) + 1
799 if i > 0:
800 line = self._readbuffer[self._offset: i]
801 self._offset = i
802 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000803
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000804 if not self._universal:
805 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000807 line = b''
808 while limit < 0 or len(line) < limit:
809 readahead = self.peek(2)
810 if readahead == b'':
811 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000813 #
814 # Search for universal newlines or line chunks.
815 #
816 # The pattern returns either a line chunk or a newline, but not
817 # both. Combined with peek(2), we are assured that the sequence
818 # '\r\n' is always retrieved completely and never split into
819 # separate newlines - '\r', '\n' due to coincidental readaheads.
820 #
821 match = self.PATTERN.search(readahead)
822 newline = match.group('newline')
823 if newline is not None:
824 if self.newlines is None:
825 self.newlines = []
826 if newline not in self.newlines:
827 self.newlines.append(newline)
828 self._offset += len(newline)
829 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000831 chunk = match.group('chunk')
832 if limit >= 0:
833 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000835 self._offset += len(chunk)
836 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000838 return line
839
840 def peek(self, n=1):
841 """Returns buffered bytes without advancing the position."""
842 if n > len(self._readbuffer) - self._offset:
843 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200844 if len(chunk) > self._offset:
845 self._readbuffer = chunk + self._readbuffer[self._offset:]
846 self._offset = 0
847 else:
848 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000849
850 # Return up to 512 bytes to reduce allocation overhead for tight loops.
851 return self._readbuffer[self._offset: self._offset + 512]
852
853 def readable(self):
854 return True
855
856 def read(self, n=-1):
857 """Read and return up to n bytes.
858 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200860 if n is None or n < 0:
861 buf = self._readbuffer[self._offset:]
862 self._readbuffer = b''
863 self._offset = 0
864 while not self._eof:
865 buf += self._read1(self.MAX_N)
866 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
Antoine Pitrou78157b32012-06-23 16:44:48 +0200868 end = n + self._offset
869 if end < len(self._readbuffer):
870 buf = self._readbuffer[self._offset:end]
871 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200872 return buf
873
Antoine Pitrou78157b32012-06-23 16:44:48 +0200874 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200875 buf = self._readbuffer[self._offset:]
876 self._readbuffer = b''
877 self._offset = 0
878 while n > 0 and not self._eof:
879 data = self._read1(n)
880 if n < len(data):
881 self._readbuffer = data
882 self._offset = n
883 buf += data[:n]
884 break
885 buf += data
886 n -= len(data)
887 return buf
888
889 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000890 # Update the CRC using the given data.
891 if self._expected_crc is None:
892 # No need to compute the CRC if we don't have a reference value
893 return
Martin Panterb82032f2015-12-11 05:19:29 +0000894 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000895 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200896 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000897 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000898
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000899 def read1(self, n):
900 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200902 if n is None or n < 0:
903 buf = self._readbuffer[self._offset:]
904 self._readbuffer = b''
905 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300906 while not self._eof:
907 data = self._read1(self.MAX_N)
908 if data:
909 buf += data
910 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200911 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912
Antoine Pitrou78157b32012-06-23 16:44:48 +0200913 end = n + self._offset
914 if end < len(self._readbuffer):
915 buf = self._readbuffer[self._offset:end]
916 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200917 return buf
918
Antoine Pitrou78157b32012-06-23 16:44:48 +0200919 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200920 buf = self._readbuffer[self._offset:]
921 self._readbuffer = b''
922 self._offset = 0
923 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300924 while not self._eof:
925 data = self._read1(n)
926 if n < len(data):
927 self._readbuffer = data
928 self._offset = n
929 buf += data[:n]
930 break
931 if data:
932 buf += data
933 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200934 return buf
935
936 def _read1(self, n):
937 # Read up to n compressed bytes with at most one read() system call,
938 # decrypt and decompress them.
939 if self._eof or n <= 0:
940 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000942 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200943 if self._compress_type == ZIP_DEFLATED:
944 ## Handle unconsumed data.
945 data = self._decompressor.unconsumed_tail
946 if n > len(data):
947 data += self._read2(n - len(data))
948 else:
949 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000950
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200951 if self._compress_type == ZIP_STORED:
952 self._eof = self._compress_left <= 0
953 elif self._compress_type == ZIP_DEFLATED:
954 n = max(n, self.MIN_READ_SIZE)
955 data = self._decompressor.decompress(data, n)
956 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200957 self._compress_left <= 0 and
958 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200959 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000960 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200961 else:
962 data = self._decompressor.decompress(data)
963 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200965 data = data[:self._left]
966 self._left -= len(data)
967 if self._left <= 0:
968 self._eof = True
969 self._update_crc(data)
970 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000971
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200972 def _read2(self, n):
973 if self._compress_left <= 0:
974 return b''
975
976 n = max(n, self.MIN_READ_SIZE)
977 n = min(n, self._compress_left)
978
979 data = self._fileobj.read(n)
980 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200981 if not data:
982 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200983
984 if self._decrypter is not None:
985 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000986 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000987
Łukasz Langae94980a2010-11-22 23:31:26 +0000988 def close(self):
989 try:
990 if self._close_fileobj:
991 self._fileobj.close()
992 finally:
993 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000994
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000995
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000997 """ Class with methods to open, read, write, close, list zip files.
998
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200999 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001000
Fred Drake3d9091e2001-03-26 15:49:24 +00001001 file: Either the path to the file, or a file-like object.
1002 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001003 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1004 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001005 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1006 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001007 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1008 needed, otherwise it will raise an exception when this would
1009 be necessary.
1010
Fred Drake3d9091e2001-03-26 15:49:24 +00001011 """
Fred Drake484d7352000-10-02 21:14:52 +00001012
Fred Drake90eac282001-02-28 05:29:34 +00001013 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001014 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001015
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001016 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001017 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1018 or append 'a'."""
1019 if mode not in ('r', 'w', 'x', 'a'):
1020 raise RuntimeError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001021
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001022 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001023
1024 self._allowZip64 = allowZip64
1025 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001026 self.debug = 0 # Level of printing: 0 through 3
1027 self.NameToInfo = {} # Find file info given name
1028 self.filelist = [] # List of ZipInfo instances for archive
1029 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001030 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001031 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001032 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001033
Fred Drake3d9091e2001-03-26 15:49:24 +00001034 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001035 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001036 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001037 self._filePassed = 0
1038 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001039 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1040 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001041 filemode = modeDict[mode]
1042 while True:
1043 try:
1044 self.fp = io.open(file, filemode)
1045 except OSError:
1046 if filemode in modeDict:
1047 filemode = modeDict[filemode]
1048 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001049 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001050 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001051 else:
1052 self._filePassed = 1
1053 self.fp = file
1054 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001055 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001056 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001057 self._seekable = True
Tim Petersa19a1682001-03-29 04:36:09 +00001058
Antoine Pitrou17babc52012-11-17 23:50:08 +01001059 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001060 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001061 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001062 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001063 # set the modified flag so central directory gets written
1064 # even if no files are added to the archive
1065 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001066 try:
1067 self.start_dir = self.fp.tell()
1068 except (AttributeError, OSError):
1069 self.fp = _Tellable(self.fp)
1070 self.start_dir = 0
1071 self._seekable = False
1072 else:
1073 # Some file-like objects can provide tell() but not seek()
1074 try:
1075 self.fp.seek(self.start_dir)
1076 except (AttributeError, OSError):
1077 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001078 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001079 try:
1080 # See if file is a zip file
1081 self._RealGetContents()
1082 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001083 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001084 except BadZipFile:
1085 # file is not a zip file, just append
1086 self.fp.seek(0, 2)
1087
1088 # set the modified flag so central directory gets written
1089 # even if no files are added to the archive
1090 self._didModify = True
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001091 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001092 else:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001093 raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001094 except:
1095 fp = self.fp
1096 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001097 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001098 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001099
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001100 def __enter__(self):
1101 return self
1102
1103 def __exit__(self, type, value, traceback):
1104 self.close()
1105
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001106 def __repr__(self):
1107 result = ['<%s.%s' % (self.__class__.__module__,
1108 self.__class__.__qualname__)]
1109 if self.fp is not None:
1110 if self._filePassed:
1111 result.append(' file=%r' % self.fp)
1112 elif self.filename is not None:
1113 result.append(' filename=%r' % self.filename)
1114 result.append(' mode=%r' % self.mode)
1115 else:
1116 result.append(' [closed]')
1117 result.append('>')
1118 return ''.join(result)
1119
Tim Peters7d3bad62001-04-04 18:56:49 +00001120 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001121 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001123 try:
1124 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001125 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001126 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001127 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001128 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001130 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001131 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1132 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001133 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001134
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001136 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001137 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1138 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001139 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1140
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001141 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001142 inferred = concat + offset_cd
1143 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001144 # self.start_dir: Position of start of central directory
1145 self.start_dir = offset_cd + concat
1146 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001147 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001148 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001149 total = 0
1150 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001151 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001152 if len(centdir) != sizeCentralDir:
1153 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001155 if centdir[_CD_SIGNATURE] != stringCentralDir:
1156 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001158 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001159 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001160 flags = centdir[5]
1161 if flags & 0x800:
1162 # UTF-8 file names extension
1163 filename = filename.decode('utf-8')
1164 else:
1165 # Historical ZIP filename encoding
1166 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001167 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001168 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001169 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1170 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001171 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001172 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001173 x.flag_bits, x.compress_type, t, d,
1174 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001175 if x.extract_version > MAX_EXTRACT_VERSION:
1176 raise NotImplementedError("zip file version %.1f" %
1177 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001178 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1179 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001180 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001182 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001183
1184 x._decodeExtra()
1185 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 self.filelist.append(x)
1187 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001188
1189 # update total bytes read from central directory
1190 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1191 + centdir[_CD_EXTRA_FIELD_LENGTH]
1192 + centdir[_CD_COMMENT_LENGTH])
1193
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001195 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001196
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001197
1198 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001199 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001200 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201
1202 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001203 """Return a list of class ZipInfo instances for files in the
1204 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001205 return self.filelist
1206
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001207 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001208 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001209 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1210 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001212 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001213 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1214 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215
1216 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001217 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001218 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 for zinfo in self.filelist:
1220 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001221 # Read by chunks, to avoid an OverflowError or a
1222 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001223 with self.open(zinfo.filename, "r") as f:
1224 while f.read(chunk_size): # Check CRC-32
1225 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001226 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 return zinfo.filename
1228
1229 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001230 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001231 info = self.NameToInfo.get(name)
1232 if info is None:
1233 raise KeyError(
1234 'There is no item named %r in the archive' % name)
1235
1236 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001237
Thomas Wouterscf297e42007-02-23 15:07:44 +00001238 def setpassword(self, pwd):
1239 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001240 if pwd and not isinstance(pwd, bytes):
1241 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1242 if pwd:
1243 self.pwd = pwd
1244 else:
1245 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001246
R David Murrayf50b38a2012-04-12 18:44:58 -04001247 @property
1248 def comment(self):
1249 """The comment text associated with the ZIP file."""
1250 return self._comment
1251
1252 @comment.setter
1253 def comment(self, comment):
1254 if not isinstance(comment, bytes):
1255 raise TypeError("comment: expected bytes, got %s" % type(comment))
1256 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001257 if len(comment) > ZIP_MAX_COMMENT:
1258 import warnings
1259 warnings.warn('Archive comment is too long; truncating to %d bytes'
1260 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001261 comment = comment[:ZIP_MAX_COMMENT]
1262 self._comment = comment
1263 self._didModify = True
1264
Thomas Wouterscf297e42007-02-23 15:07:44 +00001265 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001266 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001267 with self.open(name, "r", pwd) as fp:
1268 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001269
1270 def open(self, name, mode="r", pwd=None):
1271 """Return file-like object for 'name'."""
1272 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001273 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001274 if 'U' in mode:
1275 import warnings
1276 warnings.warn("'U' mode is deprecated",
1277 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001278 if pwd and not isinstance(pwd, bytes):
1279 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001281 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001282 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001283
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001284 # Make sure we have an info object
1285 if isinstance(name, ZipInfo):
1286 # 'name' is already an info object
1287 zinfo = name
Guido van Rossumd8faa362007-04-27 19:54:29 +00001288 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001289 # Get info object for name
1290 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001291
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001292 self._fileRefCnt += 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001293 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001294 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001295 # Skip the file header:
1296 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001297 if len(fheader) != sizeFileHeader:
1298 raise BadZipFile("Truncated file header")
1299 fheader = struct.unpack(structFileHeader, fheader)
1300 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001301 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001302
Antoine Pitrou17babc52012-11-17 23:50:08 +01001303 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1304 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1305 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001306
Antoine Pitrou8572da52012-11-17 23:52:05 +01001307 if zinfo.flag_bits & 0x20:
1308 # Zip 2.7: compressed patched data
1309 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001310
Antoine Pitrou8572da52012-11-17 23:52:05 +01001311 if zinfo.flag_bits & 0x40:
1312 # strong encryption
1313 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001314
Antoine Pitrou17babc52012-11-17 23:50:08 +01001315 if zinfo.flag_bits & 0x800:
1316 # UTF-8 filename
1317 fname_str = fname.decode("utf-8")
1318 else:
1319 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001320
Antoine Pitrou17babc52012-11-17 23:50:08 +01001321 if fname_str != zinfo.orig_filename:
1322 raise BadZipFile(
1323 'File name in directory %r and header %r differ.'
1324 % (zinfo.orig_filename, fname))
1325
1326 # check for encrypted flag & handle password
1327 is_encrypted = zinfo.flag_bits & 0x1
1328 zd = None
1329 if is_encrypted:
1330 if not pwd:
1331 pwd = self.pwd
1332 if not pwd:
1333 raise RuntimeError("File %s is encrypted, password "
1334 "required for extraction" % name)
1335
1336 zd = _ZipDecrypter(pwd)
1337 # The first 12 bytes in the cypher stream is an encryption header
1338 # used to strengthen the algorithm. The first 11 bytes are
1339 # completely random, while the 12th contains the MSB of the CRC,
1340 # or the MSB of the file time depending on the header type
1341 # and is used to check the correctness of the password.
1342 header = zef_file.read(12)
1343 h = list(map(zd, header[0:12]))
1344 if zinfo.flag_bits & 0x8:
1345 # compare against the file type from extended local headers
1346 check_byte = (zinfo._raw_time >> 8) & 0xff
1347 else:
1348 # compare against the CRC otherwise
1349 check_byte = (zinfo.CRC >> 24) & 0xff
1350 if h[11] != check_byte:
1351 raise RuntimeError("Bad password for file", name)
1352
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001353 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001354 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001355 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001356 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001357
Christian Heimes790c8232008-01-07 21:14:23 +00001358 def extract(self, member, path=None, pwd=None):
1359 """Extract a member from the archive to the current working directory,
1360 using its full name. Its file information is extracted as accurately
1361 as possible. `member' may be a filename or a ZipInfo object. You can
1362 specify a different directory using `path'.
1363 """
1364 if not isinstance(member, ZipInfo):
1365 member = self.getinfo(member)
1366
1367 if path is None:
1368 path = os.getcwd()
1369
1370 return self._extract_member(member, path, pwd)
1371
1372 def extractall(self, path=None, members=None, pwd=None):
1373 """Extract all members from the archive to the current working
1374 directory. `path' specifies a different directory to extract to.
1375 `members' is optional and must be a subset of the list returned
1376 by namelist().
1377 """
1378 if members is None:
1379 members = self.namelist()
1380
1381 for zipinfo in members:
1382 self.extract(zipinfo, path, pwd)
1383
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001384 @classmethod
1385 def _sanitize_windows_name(cls, arcname, pathsep):
1386 """Replace bad characters and remove trailing dots from parts."""
1387 table = cls._windows_illegal_name_trans_table
1388 if not table:
1389 illegal = ':<>|"?*'
1390 table = str.maketrans(illegal, '_' * len(illegal))
1391 cls._windows_illegal_name_trans_table = table
1392 arcname = arcname.translate(table)
1393 # remove trailing dots
1394 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1395 # rejoin, removing empty parts.
1396 arcname = pathsep.join(x for x in arcname if x)
1397 return arcname
1398
Christian Heimes790c8232008-01-07 21:14:23 +00001399 def _extract_member(self, member, targetpath, pwd):
1400 """Extract the ZipInfo object 'member' to a physical
1401 file on the path targetpath.
1402 """
1403 # build the destination pathname, replacing
1404 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001405 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001406
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001407 if os.path.altsep:
1408 arcname = arcname.replace(os.path.altsep, os.path.sep)
1409 # interpret absolute pathname as relative, remove drive letter or
1410 # UNC path, redundant separators, "." and ".." components.
1411 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001412 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001413 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001414 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001415 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001416 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001417 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001418
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001419 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001420 targetpath = os.path.normpath(targetpath)
1421
1422 # Create all upper directories if necessary.
1423 upperdirs = os.path.dirname(targetpath)
1424 if upperdirs and not os.path.exists(upperdirs):
1425 os.makedirs(upperdirs)
1426
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001427 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001428 if not os.path.isdir(targetpath):
1429 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001430 return targetpath
1431
Antoine Pitrou17babc52012-11-17 23:50:08 +01001432 with self.open(member, pwd=pwd) as source, \
1433 open(targetpath, "wb") as target:
1434 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001435
1436 return targetpath
1437
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001438 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001439 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001440 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001441 import warnings
1442 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001443 if self.mode not in ('w', 'x', 'a'):
1444 raise RuntimeError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001445 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001446 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001447 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001448 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001449 if not self._allowZip64:
1450 requires_zip64 = None
1451 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1452 requires_zip64 = "Files count"
1453 elif zinfo.file_size > ZIP64_LIMIT:
1454 requires_zip64 = "Filesize"
1455 elif zinfo.header_offset > ZIP64_LIMIT:
1456 requires_zip64 = "Zipfile size"
1457 if requires_zip64:
1458 raise LargeZipFile(requires_zip64 +
1459 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001460
1461 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001462 """Put the bytes from filename into the archive under the name
1463 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001464 if not self.fp:
1465 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001466 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001467
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001468 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001469
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001470 if zinfo.is_dir():
1471 zinfo.compress_size = 0
1472 zinfo.CRC = 0
1473 else:
1474 if compress_type is not None:
1475 zinfo.compress_type = compress_type
1476 else:
1477 zinfo.compress_type = self.compression
1478
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001479 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001480 if self._seekable:
1481 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001482 zinfo.header_offset = self.fp.tell() # Start of header bytes
1483 if zinfo.compress_type == ZIP_LZMA:
1484 # Compressed data includes an end-of-stream (EOS) marker
1485 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001486
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001487 self._writecheck(zinfo)
1488 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001489
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001490 if zinfo.is_dir():
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001491 self.filelist.append(zinfo)
1492 self.NameToInfo[zinfo.filename] = zinfo
1493 self.fp.write(zinfo.FileHeader(False))
1494 self.start_dir = self.fp.tell()
1495 return
1496
1497 cmpr = _get_compressor(zinfo.compress_type)
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001498 if not self._seekable:
1499 zinfo.flag_bits |= 0x08
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001500 with open(filename, "rb") as fp:
1501 # Must overwrite CRC and sizes with correct data later
1502 zinfo.CRC = CRC = 0
1503 zinfo.compress_size = compress_size = 0
1504 # Compressed size can be larger than uncompressed size
1505 zip64 = self._allowZip64 and \
1506 zinfo.file_size * 1.05 > ZIP64_LIMIT
1507 self.fp.write(zinfo.FileHeader(zip64))
1508 file_size = 0
1509 while 1:
1510 buf = fp.read(1024 * 8)
1511 if not buf:
1512 break
1513 file_size = file_size + len(buf)
Martin Panterb82032f2015-12-11 05:19:29 +00001514 CRC = crc32(buf, CRC)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001515 if cmpr:
1516 buf = cmpr.compress(buf)
1517 compress_size = compress_size + len(buf)
1518 self.fp.write(buf)
1519 if cmpr:
1520 buf = cmpr.flush()
1521 compress_size = compress_size + len(buf)
1522 self.fp.write(buf)
1523 zinfo.compress_size = compress_size
1524 else:
1525 zinfo.compress_size = file_size
1526 zinfo.CRC = CRC
1527 zinfo.file_size = file_size
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001528 if zinfo.flag_bits & 0x08:
1529 # Write CRC and file sizes after the file data
1530 fmt = '<LQQ' if zip64 else '<LLL'
1531 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1532 zinfo.file_size))
1533 self.start_dir = self.fp.tell()
1534 else:
1535 if not zip64 and self._allowZip64:
1536 if file_size > ZIP64_LIMIT:
1537 raise RuntimeError('File size has increased during compressing')
1538 if compress_size > ZIP64_LIMIT:
1539 raise RuntimeError('Compressed size larger than uncompressed size')
1540 # Seek backwards and write file header (which will now include
1541 # correct CRC and file sizes)
1542 self.start_dir = self.fp.tell() # Preserve current position in file
1543 self.fp.seek(zinfo.header_offset)
1544 self.fp.write(zinfo.FileHeader(zip64))
1545 self.fp.seek(self.start_dir)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001546 self.filelist.append(zinfo)
1547 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001548
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001549 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001550 """Write a file into the archive. The contents is 'data', which
1551 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1552 it is encoded as UTF-8 first.
1553 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001554 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001555 if isinstance(data, str):
1556 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001557 if not isinstance(zinfo_or_arcname, ZipInfo):
1558 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001559 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001560 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001561 if zinfo.filename[-1] == '/':
1562 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1563 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1564 else:
1565 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001566 else:
1567 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001568
1569 if not self.fp:
1570 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001571 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001572
Guido van Rossum85825dc2007-08-27 17:03:28 +00001573 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001574 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001575 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001576 self.fp.seek(self.start_dir)
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001577 zinfo.header_offset = self.fp.tell() # Start of header data
1578 if compress_type is not None:
1579 zinfo.compress_type = compress_type
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001580 zinfo.header_offset = self.fp.tell() # Start of header data
1581 if compress_type is not None:
1582 zinfo.compress_type = compress_type
1583 if zinfo.compress_type == ZIP_LZMA:
1584 # Compressed data includes an end-of-stream (EOS) marker
1585 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001586
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001587 self._writecheck(zinfo)
1588 self._didModify = True
Martin Panterb82032f2015-12-11 05:19:29 +00001589 zinfo.CRC = crc32(data) # CRC-32 checksum
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001590 co = _get_compressor(zinfo.compress_type)
1591 if co:
1592 data = co.compress(data) + co.flush()
1593 zinfo.compress_size = len(data) # Compressed size
1594 else:
1595 zinfo.compress_size = zinfo.file_size
1596 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1597 zinfo.compress_size > ZIP64_LIMIT
1598 if zip64 and not self._allowZip64:
1599 raise LargeZipFile("Filesize would require ZIP64 extensions")
1600 self.fp.write(zinfo.FileHeader(zip64))
1601 self.fp.write(data)
1602 if zinfo.flag_bits & 0x08:
1603 # Write CRC and file sizes after the file data
1604 fmt = '<LQQ' if zip64 else '<LLL'
1605 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1606 zinfo.file_size))
1607 self.fp.flush()
1608 self.start_dir = self.fp.tell()
1609 self.filelist.append(zinfo)
1610 self.NameToInfo[zinfo.filename] = zinfo
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001611
1612 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001613 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001614 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001615
1616 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001617 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001618 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001619 if self.fp is None:
1620 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001621
Antoine Pitrou17babc52012-11-17 23:50:08 +01001622 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001623 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001624 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001625 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001626 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001627 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001628 finally:
1629 fp = self.fp
1630 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001631 self._fpclose(fp)
1632
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001633 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001634 for zinfo in self.filelist: # write central directory
1635 dt = zinfo.date_time
1636 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1637 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1638 extra = []
1639 if zinfo.file_size > ZIP64_LIMIT \
1640 or zinfo.compress_size > ZIP64_LIMIT:
1641 extra.append(zinfo.file_size)
1642 extra.append(zinfo.compress_size)
1643 file_size = 0xffffffff
1644 compress_size = 0xffffffff
1645 else:
1646 file_size = zinfo.file_size
1647 compress_size = zinfo.compress_size
1648
1649 if zinfo.header_offset > ZIP64_LIMIT:
1650 extra.append(zinfo.header_offset)
1651 header_offset = 0xffffffff
1652 else:
1653 header_offset = zinfo.header_offset
1654
1655 extra_data = zinfo.extra
1656 min_version = 0
1657 if extra:
1658 # Append a ZIP64 field to the extra's
1659 extra_data = struct.pack(
1660 '<HH' + 'Q'*len(extra),
1661 1, 8*len(extra), *extra) + extra_data
1662
1663 min_version = ZIP64_VERSION
1664
1665 if zinfo.compress_type == ZIP_BZIP2:
1666 min_version = max(BZIP2_VERSION, min_version)
1667 elif zinfo.compress_type == ZIP_LZMA:
1668 min_version = max(LZMA_VERSION, min_version)
1669
1670 extract_version = max(min_version, zinfo.extract_version)
1671 create_version = max(min_version, zinfo.create_version)
1672 try:
1673 filename, flag_bits = zinfo._encodeFilenameFlags()
1674 centdir = struct.pack(structCentralDir,
1675 stringCentralDir, create_version,
1676 zinfo.create_system, extract_version, zinfo.reserved,
1677 flag_bits, zinfo.compress_type, dostime, dosdate,
1678 zinfo.CRC, compress_size, file_size,
1679 len(filename), len(extra_data), len(zinfo.comment),
1680 0, zinfo.internal_attr, zinfo.external_attr,
1681 header_offset)
1682 except DeprecationWarning:
1683 print((structCentralDir, stringCentralDir, create_version,
1684 zinfo.create_system, extract_version, zinfo.reserved,
1685 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1686 zinfo.CRC, compress_size, file_size,
1687 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1688 0, zinfo.internal_attr, zinfo.external_attr,
1689 header_offset), file=sys.stderr)
1690 raise
1691 self.fp.write(centdir)
1692 self.fp.write(filename)
1693 self.fp.write(extra_data)
1694 self.fp.write(zinfo.comment)
1695
1696 pos2 = self.fp.tell()
1697 # Write end-of-zip-archive record
1698 centDirCount = len(self.filelist)
1699 centDirSize = pos2 - self.start_dir
1700 centDirOffset = self.start_dir
1701 requires_zip64 = None
1702 if centDirCount > ZIP_FILECOUNT_LIMIT:
1703 requires_zip64 = "Files count"
1704 elif centDirOffset > ZIP64_LIMIT:
1705 requires_zip64 = "Central directory offset"
1706 elif centDirSize > ZIP64_LIMIT:
1707 requires_zip64 = "Central directory size"
1708 if requires_zip64:
1709 # Need to write the ZIP64 end-of-archive records
1710 if not self._allowZip64:
1711 raise LargeZipFile(requires_zip64 +
1712 " would require ZIP64 extensions")
1713 zip64endrec = struct.pack(
1714 structEndArchive64, stringEndArchive64,
1715 44, 45, 45, 0, 0, centDirCount, centDirCount,
1716 centDirSize, centDirOffset)
1717 self.fp.write(zip64endrec)
1718
1719 zip64locrec = struct.pack(
1720 structEndArchive64Locator,
1721 stringEndArchive64Locator, 0, pos2, 1)
1722 self.fp.write(zip64locrec)
1723 centDirCount = min(centDirCount, 0xFFFF)
1724 centDirSize = min(centDirSize, 0xFFFFFFFF)
1725 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1726
1727 endrec = struct.pack(structEndArchive, stringEndArchive,
1728 0, 0, centDirCount, centDirCount,
1729 centDirSize, centDirOffset, len(self._comment))
1730 self.fp.write(endrec)
1731 self.fp.write(self._comment)
1732 self.fp.flush()
1733
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001734 def _fpclose(self, fp):
1735 assert self._fileRefCnt > 0
1736 self._fileRefCnt -= 1
1737 if not self._fileRefCnt and not self._filePassed:
1738 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001739
1740
1741class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001742 """Class to create ZIP archives with Python library files and packages."""
1743
Georg Brandl8334fd92010-12-04 10:26:46 +00001744 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001745 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001746 ZipFile.__init__(self, file, mode=mode, compression=compression,
1747 allowZip64=allowZip64)
1748 self._optimize = optimize
1749
Christian Tismer59202e52013-10-21 03:59:23 +02001750 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001751 """Add all files from "pathname" to the ZIP archive.
1752
Fred Drake484d7352000-10-02 21:14:52 +00001753 If pathname is a package directory, search the directory and
1754 all package subdirectories recursively for all *.py and enter
1755 the modules into the archive. If pathname is a plain
1756 directory, listdir *.py and enter all modules. Else, pathname
1757 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001758 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001759 This method will compile the module.py into module.pyc if
1760 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001761 If filterfunc(pathname) is given, it is called with every argument.
1762 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001763 """
Christian Tismer59202e52013-10-21 03:59:23 +02001764 if filterfunc and not filterfunc(pathname):
1765 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001766 label = 'path' if os.path.isdir(pathname) else 'file'
1767 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001768 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001769 dir, name = os.path.split(pathname)
1770 if os.path.isdir(pathname):
1771 initname = os.path.join(pathname, "__init__.py")
1772 if os.path.isfile(initname):
1773 # This is a package directory, add it
1774 if basename:
1775 basename = "%s/%s" % (basename, name)
1776 else:
1777 basename = name
1778 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001779 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001780 fname, arcname = self._get_codename(initname[0:-3], basename)
1781 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001782 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001783 self.write(fname, arcname)
1784 dirlist = os.listdir(pathname)
1785 dirlist.remove("__init__.py")
1786 # Add all *.py files and package subdirectories
1787 for filename in dirlist:
1788 path = os.path.join(pathname, filename)
1789 root, ext = os.path.splitext(filename)
1790 if os.path.isdir(path):
1791 if os.path.isfile(os.path.join(path, "__init__.py")):
1792 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001793 self.writepy(path, basename,
1794 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001795 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001796 if filterfunc and not filterfunc(path):
1797 if self.debug:
1798 print('file "%s" skipped by filterfunc' % path)
1799 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001800 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001801 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001802 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001803 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001804 self.write(fname, arcname)
1805 else:
1806 # This is NOT a package directory, add its files at top level
1807 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001808 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001809 for filename in os.listdir(pathname):
1810 path = os.path.join(pathname, filename)
1811 root, ext = os.path.splitext(filename)
1812 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001813 if filterfunc and not filterfunc(path):
1814 if self.debug:
1815 print('file "%s" skipped by filterfunc' % path)
1816 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001817 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001818 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001819 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001820 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001821 self.write(fname, arcname)
1822 else:
1823 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001824 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001825 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001826 fname, arcname = self._get_codename(pathname[0:-3], basename)
1827 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001828 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001829 self.write(fname, arcname)
1830
1831 def _get_codename(self, pathname, basename):
1832 """Return (filename, archivename) for the path.
1833
Fred Drake484d7352000-10-02 21:14:52 +00001834 Given a module name path, return the correct file path and
1835 archive name, compiling if necessary. For example, given
1836 /python/lib/string, return (/python/lib/string.pyc, string).
1837 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001838 def _compile(file, optimize=-1):
1839 import py_compile
1840 if self.debug:
1841 print("Compiling", file)
1842 try:
1843 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001844 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001845 print(err.msg)
1846 return False
1847 return True
1848
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001849 file_py = pathname + ".py"
1850 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001851 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1852 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1853 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001854 if self._optimize == -1:
1855 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001856 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001857 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1858 # Use .pyc file.
1859 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001860 elif (os.path.isfile(pycache_opt0) and
1861 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001862 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1863 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001864 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001865 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001866 elif (os.path.isfile(pycache_opt1) and
1867 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1868 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001869 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001870 fname = pycache_opt1
1871 arcname = file_pyc
1872 elif (os.path.isfile(pycache_opt2) and
1873 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1874 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1875 # file name in the archive.
1876 fname = pycache_opt2
1877 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001878 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001879 # Compile py into PEP 3147 pyc file.
1880 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001881 if sys.flags.optimize == 0:
1882 fname = pycache_opt0
1883 elif sys.flags.optimize == 1:
1884 fname = pycache_opt1
1885 else:
1886 fname = pycache_opt2
1887 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001888 else:
1889 fname = arcname = file_py
1890 else:
1891 # new mode: use given optimization level
1892 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001893 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001894 arcname = file_pyc
1895 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001896 arcname = file_pyc
1897 if self._optimize == 1:
1898 fname = pycache_opt1
1899 elif self._optimize == 2:
1900 fname = pycache_opt2
1901 else:
1902 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1903 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001904 if not (os.path.isfile(fname) and
1905 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1906 if not _compile(file_py, optimize=self._optimize):
1907 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001908 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001909 if basename:
1910 archivename = "%s/%s" % (basename, archivename)
1911 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001912
1913
1914def main(args = None):
1915 import textwrap
1916 USAGE=textwrap.dedent("""\
1917 Usage:
1918 zipfile.py -l zipfile.zip # Show listing of a zipfile
1919 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1920 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1921 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1922 """)
1923 if args is None:
1924 args = sys.argv[1:]
1925
1926 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001927 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001928 sys.exit(1)
1929
1930 if args[0] == '-l':
1931 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001932 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001933 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001934 with ZipFile(args[1], 'r') as zf:
1935 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001936
1937 elif args[0] == '-t':
1938 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001939 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001940 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001941 with ZipFile(args[1], 'r') as zf:
1942 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001943 if badfile:
1944 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001945 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001946
1947 elif args[0] == '-e':
1948 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001949 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001950 sys.exit(1)
1951
Antoine Pitrou17babc52012-11-17 23:50:08 +01001952 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03001953 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001954
1955 elif args[0] == '-c':
1956 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001957 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001958 sys.exit(1)
1959
1960 def addToZip(zf, path, zippath):
1961 if os.path.isfile(path):
1962 zf.write(path, zippath, ZIP_DEFLATED)
1963 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001964 if zippath:
1965 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001966 for nm in os.listdir(path):
1967 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001968 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001969 # else: ignore
1970
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001971 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03001972 for path in args[2:]:
1973 zippath = os.path.basename(path)
1974 if not zippath:
1975 zippath = os.path.basename(os.path.dirname(path))
1976 if zippath in ('', os.curdir, os.pardir):
1977 zippath = ''
1978 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001979
1980if __name__ == "__main__":
1981 main()