blob: 9164f8ab086a1656a24eca919f9a1684d7b30e59 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Serhiy Storchaka9e777732015-10-10 19:43:32 +030017try:
18 import threading
19except ImportError:
20 import dummy_threading as threading
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021
22try:
Tim Peterse1190062001-01-15 03:34:38 +000023 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000024 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040025except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000026 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000027 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000028
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020029try:
30 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040031except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020032 bz2 = None
33
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020034try:
35 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040036except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020037 lzma = None
38
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020039__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020040 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000041 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000042
Georg Brandl4d540882010-10-28 06:42:33 +000043class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
46
47class LargeZipFile(Exception):
48 """
49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50 and those extensions are disabled.
51 """
52
Georg Brandl4d540882010-10-28 06:42:33 +000053error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
54
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000056ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchakacfbb3942014-09-23 21:34:24 +030057ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000058ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000059
Guido van Rossum32abe6f2000-03-31 17:30:02 +000060# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020064ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000065# Other ZIP compression methods not supported
66
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020067DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020070LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020071# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020072MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020073
Martin v. Löwisb09b8442008-07-03 14:13:42 +000074# Below are some formats and associated data for reading/writing headers using
75# the struct module. The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000079
Martin v. Löwisb09b8442008-07-03 14:13:42 +000080# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000082structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000102stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000103sizeCentralDir = struct.calcsize(structCentralDir)
104
Fred Drake3e038e52001-02-28 17:56:26 +0000105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126# The "local file header" structure, magic number, size, and indices
127# (section V.A in the format document)
128structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000129stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130sizeFileHeader = struct.calcsize(structFileHeader)
131
Fred Drake3e038e52001-02-28 17:56:26 +0000132_FH_SIGNATURE = 0
133_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000134_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000135_FH_GENERAL_PURPOSE_FLAG_BITS = 3
136_FH_COMPRESSION_METHOD = 4
137_FH_LAST_MOD_TIME = 5
138_FH_LAST_MOD_DATE = 6
139_FH_CRC = 7
140_FH_COMPRESSED_SIZE = 8
141_FH_UNCOMPRESSED_SIZE = 9
142_FH_FILENAME_LENGTH = 10
143_FH_EXTRA_FIELD_LENGTH = 11
144
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000146structEndArchive64Locator = "<4sLQL"
147stringEndArchive64Locator = b"PK\x06\x07"
148sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149
150# The "Zip64 end of central directory" record, magic number, size, and indices
151# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000152structEndArchive64 = "<4sQ2H2L4Q"
153stringEndArchive64 = b"PK\x06\x06"
154sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000155
156_CD64_SIGNATURE = 0
157_CD64_DIRECTORY_RECSIZE = 1
158_CD64_CREATE_VERSION = 2
159_CD64_EXTRACT_VERSION = 3
160_CD64_DISK_NUMBER = 4
161_CD64_DISK_NUMBER_START = 5
162_CD64_NUMBER_ENTRIES_THIS_DISK = 6
163_CD64_NUMBER_ENTRIES_TOTAL = 7
164_CD64_DIRECTORY_SIZE = 8
165_CD64_OFFSET_START_CENTDIR = 9
166
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000167def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000169 if _EndRecData(fp):
170 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200171 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000172 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000173 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000174
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000175def is_zipfile(filename):
176 """Quickly see if a file is a ZIP file by checking the magic number.
177
178 The filename argument may be a file or file-like object too.
179 """
180 result = False
181 try:
182 if hasattr(filename, "read"):
183 result = _check_zipfile(fp=filename)
184 else:
185 with open(filename, "rb") as fp:
186 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200187 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000188 pass
189 return result
190
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000191def _EndRecData64(fpin, offset, endrec):
192 """
193 Read the ZIP64 end-of-archive records and use that to update endrec
194 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000195 try:
196 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200197 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000198 # If the seek fails, the file is not large enough to contain a ZIP64
199 # end-of-archive record, so just return the end record we were given.
200 return endrec
201
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000202 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200203 if len(data) != sizeEndCentDir64Locator:
204 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000205 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
206 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207 return endrec
208
209 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000210 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000211
212 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
214 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200215 if len(data) != sizeEndCentDir64:
216 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200218 dircount, dircount2, dirsize, diroffset = \
219 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000220 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000221 return endrec
222
223 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000224 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 endrec[_ECD_DISK_NUMBER] = disk_num
226 endrec[_ECD_DISK_START] = disk_dir
227 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
228 endrec[_ECD_ENTRIES_TOTAL] = dircount2
229 endrec[_ECD_SIZE] = dirsize
230 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000231 return endrec
232
233
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234def _EndRecData(fpin):
235 """Return data from the "End of Central Directory" record, or None.
236
237 The data is a list of the nine items in the ZIP "End of central dir"
238 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000239
240 # Determine file size
241 fpin.seek(0, 2)
242 filesize = fpin.tell()
243
244 # Check to see if this is ZIP file with no archive comment (the
245 # "end of central directory" structure should be the last item in the
246 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000247 try:
248 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000250 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000251 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200252 if (len(data) == sizeEndCentDir and
253 data[0:4] == stringEndArchive and
254 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000255 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000256 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257 endrec=list(endrec)
258
259 # Append a blank comment and record start offset
260 endrec.append(b"")
261 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000262
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000263 # Try to read the "Zip64 end of central directory" structure
264 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000265
266 # Either this is not a ZIP file, or it is a ZIP file with an archive
267 # comment. Search the end of the file for the "end of central directory"
268 # record signature. The comment is the last item in the ZIP file and may be
269 # up to 64K long. It is assumed that the "end of central directory" magic
270 # number does not appear in the comment.
271 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
272 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000273 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000274 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000275 if start >= 0:
276 # found the magic number; attempt to unpack and interpret
277 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200278 if len(recData) != sizeEndCentDir:
279 # Zip file is corrupted.
280 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000281 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400282 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
283 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
284 endrec.append(comment)
285 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000286
R David Murray4fbb9db2011-06-09 15:50:51 -0400287 # Try to read the "Zip64 end of central directory" structure
288 return _EndRecData64(fpin, maxCommentStart + start - filesize,
289 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000290
291 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200292 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000293
Fred Drake484d7352000-10-02 21:14:52 +0000294
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000296 """Class with attributes describing each file in the ZIP archive."""
297
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200299 'orig_filename',
300 'filename',
301 'date_time',
302 'compress_type',
303 'comment',
304 'extra',
305 'create_system',
306 'create_version',
307 'extract_version',
308 'reserved',
309 'flag_bits',
310 'volume',
311 'internal_attr',
312 'external_attr',
313 'header_offset',
314 'CRC',
315 'compress_size',
316 'file_size',
317 '_raw_time',
318 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000319
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000320 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000321 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000322
323 # Terminate the file name at the first null byte. Null bytes in file
324 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000325 null_byte = filename.find(chr(0))
326 if null_byte >= 0:
327 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000328 # This is used to ensure paths in generated ZIP files always use
329 # forward slashes as the directory separator, as required by the
330 # ZIP format specification.
331 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000332 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000333
Greg Ward8e36d282003-06-18 00:53:06 +0000334 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000335 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800336
337 if date_time[0] < 1980:
338 raise ValueError('ZIP does not support timestamps before 1980')
339
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000341 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000342 self.comment = b"" # Comment for each file
343 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000344 if sys.platform == 'win32':
345 self.create_system = 0 # System which created ZIP archive
346 else:
347 # Assume everything else is unix-y
348 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200349 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
350 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000351 self.reserved = 0 # Must be zero
352 self.flag_bits = 0 # ZIP flag bits
353 self.volume = 0 # Volume number of file header
354 self.internal_attr = 0 # Internal attributes
355 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000357 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000358 # CRC CRC-32 of the uncompressed file
359 # compress_size Size of the compressed file
360 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000361
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200362 def __repr__(self):
363 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
364 if self.compress_type != ZIP_STORED:
365 result.append(' compress_type=%s' %
366 compressor_names.get(self.compress_type,
367 self.compress_type))
368 hi = self.external_attr >> 16
369 lo = self.external_attr & 0xFFFF
370 if hi:
371 result.append(' filemode=%r' % stat.filemode(hi))
372 if lo:
373 result.append(' external_attr=%#x' % lo)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200374 isdir = self.is_dir()
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200375 if not isdir or self.file_size:
376 result.append(' file_size=%r' % self.file_size)
377 if ((not isdir or self.compress_size) and
378 (self.compress_type != ZIP_STORED or
379 self.file_size != self.compress_size)):
380 result.append(' compress_size=%r' % self.compress_size)
381 result.append('>')
382 return ''.join(result)
383
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200384 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000385 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386 dt = self.date_time
387 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000388 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000389 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000390 # Set these to zero because we write them after the file data
391 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000392 else:
Tim Peterse1190062001-01-15 03:34:38 +0000393 CRC = self.CRC
394 compress_size = self.compress_size
395 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396
397 extra = self.extra
398
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200399 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200400 if zip64 is None:
401 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
402 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000403 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200405 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200406 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
407 if not zip64:
408 raise LargeZipFile("Filesize would require ZIP64 extensions")
409 # File is larger than what fits into a 4 byte integer,
410 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000411 file_size = 0xffffffff
412 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200413 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000414
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200415 if self.compress_type == ZIP_BZIP2:
416 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200417 elif self.compress_type == ZIP_LZMA:
418 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200419
420 self.extract_version = max(min_version, self.extract_version)
421 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000422 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000423 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200424 self.extract_version, self.reserved, flag_bits,
425 self.compress_type, dostime, dosdate, CRC,
426 compress_size, file_size,
427 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000428 return header + filename + extra
429
430 def _encodeFilenameFlags(self):
431 try:
432 return self.filename.encode('ascii'), self.flag_bits
433 except UnicodeEncodeError:
434 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000435
436 def _decodeExtra(self):
437 # Try to decode the extra field.
438 extra = self.extra
439 unpack = struct.unpack
Gregory P. Smith0af8a862014-05-29 23:42:14 -0700440 while len(extra) >= 4:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000441 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000442 if tp == 1:
443 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000444 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000445 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000446 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000447 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000448 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000449 elif ln == 0:
450 counts = ()
451 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300452 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000453
454 idx = 0
455
456 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000457 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 self.file_size = counts[idx]
459 idx += 1
460
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000461 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000462 self.compress_size = counts[idx]
463 idx += 1
464
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000465 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000466 old = self.header_offset
467 self.header_offset = counts[idx]
468 idx+=1
469
470 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000471
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200472 @classmethod
473 def from_file(cls, filename, arcname=None):
474 """Construct an appropriate ZipInfo for a file on the filesystem.
475
476 filename should be the path to a file or directory on the filesystem.
477
478 arcname is the name which it will have within the archive (by default,
479 this will be the same as filename, but without a drive letter and with
480 leading path separators removed).
481 """
Serhiy Storchakaeb65edd2017-03-08 15:45:43 +0200482 if isinstance(filename, os.PathLike):
483 filename = os.fspath(filename)
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200484 st = os.stat(filename)
485 isdir = stat.S_ISDIR(st.st_mode)
486 mtime = time.localtime(st.st_mtime)
487 date_time = mtime[0:6]
488 # Create ZipInfo instance to store file information
489 if arcname is None:
490 arcname = filename
491 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
492 while arcname[0] in (os.sep, os.altsep):
493 arcname = arcname[1:]
494 if isdir:
495 arcname += '/'
496 zinfo = cls(arcname, date_time)
497 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes
498 if isdir:
499 zinfo.file_size = 0
500 zinfo.external_attr |= 0x10 # MS-DOS directory flag
501 else:
502 zinfo.file_size = st.st_size
503
504 return zinfo
505
506 def is_dir(self):
Serhiy Storchakaf47fc552016-05-15 12:27:16 +0300507 """Return True if this archive member is a directory."""
Serhiy Storchaka503f9082016-02-08 00:02:25 +0200508 return self.filename[-1] == '/'
509
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000510
Thomas Wouterscf297e42007-02-23 15:07:44 +0000511class _ZipDecrypter:
512 """Class to handle decryption of files stored within a ZIP archive.
513
514 ZIP supports a password-based form of encryption. Even though known
515 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000516 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000517
518 Usage:
519 zd = _ZipDecrypter(mypwd)
520 plain_char = zd(cypher_char)
521 plain_text = map(zd, cypher_text)
522 """
523
524 def _GenerateCRCTable():
525 """Generate a CRC-32 table.
526
527 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
528 internal keys. We noticed that a direct implementation is faster than
529 relying on binascii.crc32().
530 """
531 poly = 0xedb88320
532 table = [0] * 256
533 for i in range(256):
534 crc = i
535 for j in range(8):
536 if crc & 1:
537 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
538 else:
539 crc = ((crc >> 1) & 0x7FFFFFFF)
540 table[i] = crc
541 return table
Daniel Holth9dee3042014-01-02 23:17:21 -0500542 crctable = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000543
544 def _crc32(self, ch, crc):
545 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000546 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000547
548 def __init__(self, pwd):
Daniel Holth9dee3042014-01-02 23:17:21 -0500549 if _ZipDecrypter.crctable is None:
550 _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000551 self.key0 = 305419896
552 self.key1 = 591751049
553 self.key2 = 878082192
554 for p in pwd:
555 self._UpdateKeys(p)
556
557 def _UpdateKeys(self, c):
558 self.key0 = self._crc32(c, self.key0)
559 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
560 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000561 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000562
563 def __call__(self, c):
564 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000565 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000566 k = self.key2 | 2
567 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000568 self._UpdateKeys(c)
569 return c
570
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200571
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200572class LZMACompressor:
573
574 def __init__(self):
575 self._comp = None
576
577 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200578 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200579 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200580 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200581 ])
582 return struct.pack('<BBH', 9, 4, len(props)) + props
583
584 def compress(self, data):
585 if self._comp is None:
586 return self._init() + self._comp.compress(data)
587 return self._comp.compress(data)
588
589 def flush(self):
590 if self._comp is None:
591 return self._init() + self._comp.flush()
592 return self._comp.flush()
593
594
595class LZMADecompressor:
596
597 def __init__(self):
598 self._decomp = None
599 self._unconsumed = b''
600 self.eof = False
601
602 def decompress(self, data):
603 if self._decomp is None:
604 self._unconsumed += data
605 if len(self._unconsumed) <= 4:
606 return b''
607 psize, = struct.unpack('<H', self._unconsumed[2:4])
608 if len(self._unconsumed) <= 4 + psize:
609 return b''
610
611 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200612 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
613 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200614 ])
615 data = self._unconsumed[4 + psize:]
616 del self._unconsumed
617
618 result = self._decomp.decompress(data)
619 self.eof = self._decomp.eof
620 return result
621
622
623compressor_names = {
624 0: 'store',
625 1: 'shrink',
626 2: 'reduce',
627 3: 'reduce',
628 4: 'reduce',
629 5: 'reduce',
630 6: 'implode',
631 7: 'tokenize',
632 8: 'deflate',
633 9: 'deflate64',
634 10: 'implode',
635 12: 'bzip2',
636 14: 'lzma',
637 18: 'terse',
638 19: 'lz77',
639 97: 'wavpack',
640 98: 'ppmd',
641}
642
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200643def _check_compression(compression):
644 if compression == ZIP_STORED:
645 pass
646 elif compression == ZIP_DEFLATED:
647 if not zlib:
648 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200649 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200650 elif compression == ZIP_BZIP2:
651 if not bz2:
652 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200653 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200654 elif compression == ZIP_LZMA:
655 if not lzma:
656 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200657 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200658 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300659 raise NotImplementedError("That compression method is not supported")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200660
661
662def _get_compressor(compress_type):
663 if compress_type == ZIP_DEFLATED:
664 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200665 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200666 elif compress_type == ZIP_BZIP2:
667 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200668 elif compress_type == ZIP_LZMA:
669 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200670 else:
671 return None
672
673
674def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200675 if compress_type == ZIP_STORED:
676 return None
677 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200678 return zlib.decompressobj(-15)
679 elif compress_type == ZIP_BZIP2:
680 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200681 elif compress_type == ZIP_LZMA:
682 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200683 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200684 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200685 if descr:
686 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
687 else:
688 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200689
690
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200691class _SharedFile:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300692 def __init__(self, file, pos, close, lock, writing):
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200693 self._file = file
694 self._pos = pos
695 self._close = close
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200696 self._lock = lock
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300697 self._writing = writing
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200698
699 def read(self, n=-1):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200700 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300701 if self._writing():
Serhiy Storchakab0d497c2016-09-10 21:28:07 +0300702 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300703 "is an open writing handle on it. "
704 "Close the writing handle before trying to read.")
Serhiy Storchakaf15e5242015-01-26 13:53:38 +0200705 self._file.seek(self._pos)
706 data = self._file.read(n)
707 self._pos = self._file.tell()
708 return data
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200709
710 def close(self):
711 if self._file is not None:
712 fileobj = self._file
713 self._file = None
714 self._close(fileobj)
715
Serhiy Storchaka77d89972015-03-23 01:09:35 +0200716# Provide the tell method for unseekable stream
717class _Tellable:
718 def __init__(self, fp):
719 self.fp = fp
720 self.offset = 0
721
722 def write(self, data):
723 n = self.fp.write(data)
724 self.offset += n
725 return n
726
727 def tell(self):
728 return self.offset
729
730 def flush(self):
731 self.fp.flush()
732
733 def close(self):
734 self.fp.close()
735
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +0200736
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000737class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000738 """File-like object for reading an archive member.
739 Is returned by ZipFile.open().
740 """
741
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000742 # Max size supported by decompressor.
743 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000744
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000745 # Read from compressed files in 4k blocks.
746 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000747
Łukasz Langae94980a2010-11-22 23:31:26 +0000748 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
749 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000750 self._fileobj = fileobj
751 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000752 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000753
Ezio Melotti92b47432010-01-28 01:44:41 +0000754 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000755 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200756 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000757
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200758 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000759
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200760 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000761 self._readbuffer = b''
762 self._offset = 0
763
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000764 self.newlines = None
765
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000766 # Adjust read size for encrypted files since the first 12 bytes
767 # are for the encryption/password information.
768 if self._decrypter is not None:
769 self._compress_left -= 12
770
771 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772 self.name = zipinfo.filename
773
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000774 if hasattr(zipinfo, 'CRC'):
775 self._expected_crc = zipinfo.CRC
Martin Panterb82032f2015-12-11 05:19:29 +0000776 self._running_crc = crc32(b'')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000777 else:
778 self._expected_crc = None
779
Serhiy Storchaka51a43702014-10-29 22:42:06 +0200780 def __repr__(self):
781 result = ['<%s.%s' % (self.__class__.__module__,
782 self.__class__.__qualname__)]
783 if not self.closed:
784 result.append(' name=%r mode=%r' % (self.name, self.mode))
785 if self._compress_type != ZIP_STORED:
786 result.append(' compress_type=%s' %
787 compressor_names.get(self._compress_type,
788 self._compress_type))
789 else:
790 result.append(' [closed]')
791 result.append('>')
792 return ''.join(result)
793
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000794 def readline(self, limit=-1):
795 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000797 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000799
Serhiy Storchakae670be22016-06-11 19:32:44 +0300800 if limit < 0:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000801 # Shortcut common case - newline found in buffer.
802 i = self._readbuffer.find(b'\n', self._offset) + 1
803 if i > 0:
804 line = self._readbuffer[self._offset: i]
805 self._offset = i
806 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000807
Serhiy Storchakae670be22016-06-11 19:32:44 +0300808 return io.BufferedIOBase.readline(self, limit)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000809
810 def peek(self, n=1):
811 """Returns buffered bytes without advancing the position."""
812 if n > len(self._readbuffer) - self._offset:
813 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200814 if len(chunk) > self._offset:
815 self._readbuffer = chunk + self._readbuffer[self._offset:]
816 self._offset = 0
817 else:
818 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000819
820 # Return up to 512 bytes to reduce allocation overhead for tight loops.
821 return self._readbuffer[self._offset: self._offset + 512]
822
823 def readable(self):
824 return True
825
826 def read(self, n=-1):
827 """Read and return up to n bytes.
828 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200830 if n is None or n < 0:
831 buf = self._readbuffer[self._offset:]
832 self._readbuffer = b''
833 self._offset = 0
834 while not self._eof:
835 buf += self._read1(self.MAX_N)
836 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
Antoine Pitrou78157b32012-06-23 16:44:48 +0200838 end = n + self._offset
839 if end < len(self._readbuffer):
840 buf = self._readbuffer[self._offset:end]
841 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200842 return buf
843
Antoine Pitrou78157b32012-06-23 16:44:48 +0200844 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200845 buf = self._readbuffer[self._offset:]
846 self._readbuffer = b''
847 self._offset = 0
848 while n > 0 and not self._eof:
849 data = self._read1(n)
850 if n < len(data):
851 self._readbuffer = data
852 self._offset = n
853 buf += data[:n]
854 break
855 buf += data
856 n -= len(data)
857 return buf
858
859 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000860 # Update the CRC using the given data.
861 if self._expected_crc is None:
862 # No need to compute the CRC if we don't have a reference value
863 return
Martin Panterb82032f2015-12-11 05:19:29 +0000864 self._running_crc = crc32(newdata, self._running_crc)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000865 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200866 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000867 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000868
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000869 def read1(self, n):
870 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200872 if n is None or n < 0:
873 buf = self._readbuffer[self._offset:]
874 self._readbuffer = b''
875 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300876 while not self._eof:
877 data = self._read1(self.MAX_N)
878 if data:
879 buf += data
880 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200881 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882
Antoine Pitrou78157b32012-06-23 16:44:48 +0200883 end = n + self._offset
884 if end < len(self._readbuffer):
885 buf = self._readbuffer[self._offset:end]
886 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200887 return buf
888
Antoine Pitrou78157b32012-06-23 16:44:48 +0200889 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200890 buf = self._readbuffer[self._offset:]
891 self._readbuffer = b''
892 self._offset = 0
893 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300894 while not self._eof:
895 data = self._read1(n)
896 if n < len(data):
897 self._readbuffer = data
898 self._offset = n
899 buf += data[:n]
900 break
901 if data:
902 buf += data
903 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200904 return buf
905
906 def _read1(self, n):
907 # Read up to n compressed bytes with at most one read() system call,
908 # decrypt and decompress them.
909 if self._eof or n <= 0:
910 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000912 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200913 if self._compress_type == ZIP_DEFLATED:
914 ## Handle unconsumed data.
915 data = self._decompressor.unconsumed_tail
916 if n > len(data):
917 data += self._read2(n - len(data))
918 else:
919 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000920
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200921 if self._compress_type == ZIP_STORED:
922 self._eof = self._compress_left <= 0
923 elif self._compress_type == ZIP_DEFLATED:
924 n = max(n, self.MIN_READ_SIZE)
925 data = self._decompressor.decompress(data, n)
926 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200927 self._compress_left <= 0 and
928 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200929 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000930 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200931 else:
932 data = self._decompressor.decompress(data)
933 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200935 data = data[:self._left]
936 self._left -= len(data)
937 if self._left <= 0:
938 self._eof = True
939 self._update_crc(data)
940 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000941
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200942 def _read2(self, n):
943 if self._compress_left <= 0:
944 return b''
945
946 n = max(n, self.MIN_READ_SIZE)
947 n = min(n, self._compress_left)
948
949 data = self._fileobj.read(n)
950 self._compress_left -= len(data)
Serhiy Storchaka5ce3f102014-01-09 14:50:20 +0200951 if not data:
952 raise EOFError
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200953
954 if self._decrypter is not None:
955 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000956 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957
Łukasz Langae94980a2010-11-22 23:31:26 +0000958 def close(self):
959 try:
960 if self._close_fileobj:
961 self._fileobj.close()
962 finally:
963 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000965
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300966class _ZipWriteFile(io.BufferedIOBase):
967 def __init__(self, zf, zinfo, zip64):
968 self._zinfo = zinfo
969 self._zip64 = zip64
970 self._zipfile = zf
971 self._compressor = _get_compressor(zinfo.compress_type)
972 self._file_size = 0
973 self._compress_size = 0
974 self._crc = 0
975
976 @property
977 def _fileobj(self):
978 return self._zipfile.fp
979
980 def writable(self):
981 return True
982
983 def write(self, data):
Serhiy Storchaka8e5b52a2017-04-16 12:04:45 +0300984 if self.closed:
985 raise ValueError('I/O operation on closed file.')
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300986 nbytes = len(data)
987 self._file_size += nbytes
988 self._crc = crc32(data, self._crc)
989 if self._compressor:
990 data = self._compressor.compress(data)
991 self._compress_size += len(data)
992 self._fileobj.write(data)
993 return nbytes
994
995 def close(self):
Serhiy Storchaka8e5b52a2017-04-16 12:04:45 +0300996 if self.closed:
997 return
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +0300998 super().close()
999 # Flush any data from the compressor, and update header info
1000 if self._compressor:
1001 buf = self._compressor.flush()
1002 self._compress_size += len(buf)
1003 self._fileobj.write(buf)
1004 self._zinfo.compress_size = self._compress_size
1005 else:
1006 self._zinfo.compress_size = self._file_size
1007 self._zinfo.CRC = self._crc
1008 self._zinfo.file_size = self._file_size
1009
1010 # Write updated header info
1011 if self._zinfo.flag_bits & 0x08:
1012 # Write CRC and file sizes after the file data
1013 fmt = '<LQQ' if self._zip64 else '<LLL'
1014 self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
1015 self._zinfo.compress_size, self._zinfo.file_size))
1016 self._zipfile.start_dir = self._fileobj.tell()
1017 else:
1018 if not self._zip64:
1019 if self._file_size > ZIP64_LIMIT:
1020 raise RuntimeError('File size unexpectedly exceeded ZIP64 '
1021 'limit')
1022 if self._compress_size > ZIP64_LIMIT:
1023 raise RuntimeError('Compressed size unexpectedly exceeded '
1024 'ZIP64 limit')
1025 # Seek backwards and write file header (which will now include
1026 # correct CRC and file sizes)
1027
1028 # Preserve current position in file
1029 self._zipfile.start_dir = self._fileobj.tell()
1030 self._fileobj.seek(self._zinfo.header_offset)
1031 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1032 self._fileobj.seek(self._zipfile.start_dir)
1033
1034 self._zipfile._writing = False
1035
1036 # Successfully written: Add file to our caches
1037 self._zipfile.filelist.append(self._zinfo)
1038 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1039
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001040class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +00001041 """ Class with methods to open, read, write, close, list zip files.
1042
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001043 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +00001044
Fred Drake3d9091e2001-03-26 15:49:24 +00001045 file: Either the path to the file, or a file-like object.
1046 If it is a path, the file will be opened and closed by ZipFile.
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001047 mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1048 or append 'a'.
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001049 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1050 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001051 allowZip64: if True ZipFile will create files with ZIP64 extensions when
1052 needed, otherwise it will raise an exception when this would
1053 be necessary.
1054
Fred Drake3d9091e2001-03-26 15:49:24 +00001055 """
Fred Drake484d7352000-10-02 21:14:52 +00001056
Fred Drake90eac282001-02-28 05:29:34 +00001057 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001058 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +00001059
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001060 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001061 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1062 or append 'a'."""
1063 if mode not in ('r', 'w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001064 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001065
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001066 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001067
1068 self._allowZip64 = allowZip64
1069 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +00001070 self.debug = 0 # Level of printing: 0 through 3
1071 self.NameToInfo = {} # Find file info given name
1072 self.filelist = [] # List of ZipInfo instances for archive
1073 self.compression = compression # Method of compression
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001074 self.mode = mode
Thomas Wouterscf297e42007-02-23 15:07:44 +00001075 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -04001076 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +00001077
Fred Drake3d9091e2001-03-26 15:49:24 +00001078 # Check if we were passed a file-like object
Serhiy Storchakaeb65edd2017-03-08 15:45:43 +02001079 if isinstance(file, os.PathLike):
1080 file = os.fspath(file)
Guido van Rossum3172c5d2007-10-16 18:12:55 +00001081 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001082 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +00001083 self._filePassed = 0
1084 self.filename = file
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001085 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1086 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001087 filemode = modeDict[mode]
1088 while True:
1089 try:
1090 self.fp = io.open(file, filemode)
1091 except OSError:
1092 if filemode in modeDict:
1093 filemode = modeDict[filemode]
1094 continue
Thomas Wouterscf297e42007-02-23 15:07:44 +00001095 raise
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001096 break
Fred Drake3d9091e2001-03-26 15:49:24 +00001097 else:
1098 self._filePassed = 1
1099 self.fp = file
1100 self.filename = getattr(file, 'name', None)
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001101 self._fileRefCnt = 1
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001102 self._lock = threading.RLock()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001103 self._seekable = True
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001104 self._writing = False
Tim Petersa19a1682001-03-29 04:36:09 +00001105
Antoine Pitrou17babc52012-11-17 23:50:08 +01001106 try:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001107 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001108 self._RealGetContents()
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001109 elif mode in ('w', 'x'):
Georg Brandl268e4d42010-10-14 06:59:45 +00001110 # set the modified flag so central directory gets written
1111 # even if no files are added to the archive
1112 self._didModify = True
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001113 try:
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001114 self.start_dir = self.fp.tell()
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001115 except (AttributeError, OSError):
1116 self.fp = _Tellable(self.fp)
Serhiy Storchaka34cba332017-01-01 19:00:30 +02001117 self.start_dir = 0
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001118 self._seekable = False
1119 else:
1120 # Some file-like objects can provide tell() but not seek()
1121 try:
1122 self.fp.seek(self.start_dir)
1123 except (AttributeError, OSError):
1124 self._seekable = False
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001125 elif mode == 'a':
Antoine Pitrou17babc52012-11-17 23:50:08 +01001126 try:
1127 # See if file is a zip file
1128 self._RealGetContents()
1129 # seek to start of directory and overwrite
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001130 self.fp.seek(self.start_dir)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001131 except BadZipFile:
1132 # file is not a zip file, just append
1133 self.fp.seek(0, 2)
1134
1135 # set the modified flag so central directory gets written
1136 # even if no files are added to the archive
1137 self._didModify = True
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001138 self.start_dir = self.fp.tell()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001139 else:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001140 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
Antoine Pitrou17babc52012-11-17 23:50:08 +01001141 except:
1142 fp = self.fp
1143 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001144 self._fpclose(fp)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001145 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146
Ezio Melottifaa6b7f2009-12-30 12:34:59 +00001147 def __enter__(self):
1148 return self
1149
1150 def __exit__(self, type, value, traceback):
1151 self.close()
1152
Serhiy Storchaka51a43702014-10-29 22:42:06 +02001153 def __repr__(self):
1154 result = ['<%s.%s' % (self.__class__.__module__,
1155 self.__class__.__qualname__)]
1156 if self.fp is not None:
1157 if self._filePassed:
1158 result.append(' file=%r' % self.fp)
1159 elif self.filename is not None:
1160 result.append(' filename=%r' % self.filename)
1161 result.append(' mode=%r' % self.mode)
1162 else:
1163 result.append(' [closed]')
1164 result.append('>')
1165 return ''.join(result)
1166
Tim Peters7d3bad62001-04-04 18:56:49 +00001167 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +00001168 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001169 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +00001170 try:
1171 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001172 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +00001173 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +00001174 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +00001175 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001177 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001178 size_cd = endrec[_ECD_SIZE] # bytes in central directory
1179 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -04001180 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001181
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001182 # "concat" is zero, unless zip was concatenated to another file
1183 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +00001184 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1185 # If Zip64 extension structures are present, account for them
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001186 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001187
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 if self.debug > 2:
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001189 inferred = concat + offset_cd
1190 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 # self.start_dir: Position of start of central directory
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001192 self.start_dir = offset_cd + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001193 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001194 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001195 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001196 total = 0
1197 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001198 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001199 if len(centdir) != sizeCentralDir:
1200 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001202 if centdir[_CD_SIGNATURE] != stringCentralDir:
1203 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001205 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001206 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001207 flags = centdir[5]
1208 if flags & 0x800:
1209 # UTF-8 file names extension
1210 filename = filename.decode('utf-8')
1211 else:
1212 # Historical ZIP filename encoding
1213 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001215 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001216 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1217 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001218 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001220 x.flag_bits, x.compress_type, t, d,
1221 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001222 if x.extract_version > MAX_EXTRACT_VERSION:
1223 raise NotImplementedError("zip file version %.1f" %
1224 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001225 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1226 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001227 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001228 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001229 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230
1231 x._decodeExtra()
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001232 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 self.filelist.append(x)
1234 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001235
1236 # update total bytes read from central directory
1237 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1238 + centdir[_CD_EXTRA_FIELD_LENGTH]
1239 + centdir[_CD_COMMENT_LENGTH])
1240
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001241 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001242 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001243
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001244
1245 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001246 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001247 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001248
1249 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001250 """Return a list of class ZipInfo instances for files in the
1251 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001252 return self.filelist
1253
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001254 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001255 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001256 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1257 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001259 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001260 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1261 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001262
1263 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001264 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001265 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001266 for zinfo in self.filelist:
1267 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001268 # Read by chunks, to avoid an OverflowError or a
1269 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001270 with self.open(zinfo.filename, "r") as f:
1271 while f.read(chunk_size): # Check CRC-32
1272 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001273 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001274 return zinfo.filename
1275
1276 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001277 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001278 info = self.NameToInfo.get(name)
1279 if info is None:
1280 raise KeyError(
1281 'There is no item named %r in the archive' % name)
1282
1283 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001284
Thomas Wouterscf297e42007-02-23 15:07:44 +00001285 def setpassword(self, pwd):
1286 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001287 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001288 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
R. David Murray8d855d82010-12-21 21:53:37 +00001289 if pwd:
1290 self.pwd = pwd
1291 else:
1292 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001293
R David Murrayf50b38a2012-04-12 18:44:58 -04001294 @property
1295 def comment(self):
1296 """The comment text associated with the ZIP file."""
1297 return self._comment
1298
1299 @comment.setter
1300 def comment(self, comment):
1301 if not isinstance(comment, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001302 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
R David Murrayf50b38a2012-04-12 18:44:58 -04001303 # check for valid comment length
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001304 if len(comment) > ZIP_MAX_COMMENT:
1305 import warnings
1306 warnings.warn('Archive comment is too long; truncating to %d bytes'
1307 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murrayf50b38a2012-04-12 18:44:58 -04001308 comment = comment[:ZIP_MAX_COMMENT]
1309 self._comment = comment
1310 self._didModify = True
1311
Thomas Wouterscf297e42007-02-23 15:07:44 +00001312 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001313 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001314 with self.open(name, "r", pwd) as fp:
1315 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001316
Serhiy Storchakaf47fc552016-05-15 12:27:16 +03001317 def open(self, name, mode="r", pwd=None, *, force_zip64=False):
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001318 """Return file-like object for 'name'.
1319
1320 name is a string for the file name within the ZIP file, or a ZipInfo
1321 object.
1322
1323 mode should be 'r' to read a file already in the ZIP file, or 'w' to
1324 write to a file newly added to the archive.
1325
1326 pwd is the password to decrypt files (only used for reading).
1327
1328 When writing, if the file size is not known in advance but may exceed
1329 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1330 files. If the size is known in advance, it is best to pass a ZipInfo
1331 instance for name, with zinfo.file_size set.
1332 """
Serhiy Storchakae670be22016-06-11 19:32:44 +03001333 if mode not in {"r", "w"}:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001334 raise ValueError('open() requires mode "r" or "w"')
R. David Murray8d855d82010-12-21 21:53:37 +00001335 if pwd and not isinstance(pwd, bytes):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001336 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001337 if pwd and (mode == "w"):
1338 raise ValueError("pwd is only supported for reading files")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001340 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001341 "Attempt to use ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001342
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001343 # Make sure we have an info object
1344 if isinstance(name, ZipInfo):
1345 # 'name' is already an info object
1346 zinfo = name
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001347 elif mode == 'w':
1348 zinfo = ZipInfo(name)
1349 zinfo.compress_type = self.compression
Guido van Rossumd8faa362007-04-27 19:54:29 +00001350 else:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001351 # Get info object for name
1352 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001353
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001354 if mode == 'w':
1355 return self._open_to_write(zinfo, force_zip64=force_zip64)
1356
1357 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001358 raise ValueError("Can't read from the ZIP file while there "
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001359 "is an open writing handle on it. "
1360 "Close the writing handle before trying to read.")
1361
1362 # Open for reading:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001363 self._fileRefCnt += 1
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001364 zef_file = _SharedFile(self.fp, zinfo.header_offset,
1365 self._fpclose, self._lock, lambda: self._writing)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001366 try:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001367 # Skip the file header:
1368 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001369 if len(fheader) != sizeFileHeader:
1370 raise BadZipFile("Truncated file header")
1371 fheader = struct.unpack(structFileHeader, fheader)
1372 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001373 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001374
Antoine Pitrou17babc52012-11-17 23:50:08 +01001375 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1376 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1377 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001378
Antoine Pitrou8572da52012-11-17 23:52:05 +01001379 if zinfo.flag_bits & 0x20:
1380 # Zip 2.7: compressed patched data
1381 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001382
Antoine Pitrou8572da52012-11-17 23:52:05 +01001383 if zinfo.flag_bits & 0x40:
1384 # strong encryption
1385 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001386
Antoine Pitrou17babc52012-11-17 23:50:08 +01001387 if zinfo.flag_bits & 0x800:
1388 # UTF-8 filename
1389 fname_str = fname.decode("utf-8")
1390 else:
1391 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001392
Antoine Pitrou17babc52012-11-17 23:50:08 +01001393 if fname_str != zinfo.orig_filename:
1394 raise BadZipFile(
1395 'File name in directory %r and header %r differ.'
1396 % (zinfo.orig_filename, fname))
1397
1398 # check for encrypted flag & handle password
1399 is_encrypted = zinfo.flag_bits & 0x1
1400 zd = None
1401 if is_encrypted:
1402 if not pwd:
1403 pwd = self.pwd
1404 if not pwd:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001405 raise RuntimeError("File %r is encrypted, password "
Antoine Pitrou17babc52012-11-17 23:50:08 +01001406 "required for extraction" % name)
1407
1408 zd = _ZipDecrypter(pwd)
1409 # The first 12 bytes in the cypher stream is an encryption header
1410 # used to strengthen the algorithm. The first 11 bytes are
1411 # completely random, while the 12th contains the MSB of the CRC,
1412 # or the MSB of the file time depending on the header type
1413 # and is used to check the correctness of the password.
1414 header = zef_file.read(12)
1415 h = list(map(zd, header[0:12]))
1416 if zinfo.flag_bits & 0x8:
1417 # compare against the file type from extended local headers
1418 check_byte = (zinfo._raw_time >> 8) & 0xff
1419 else:
1420 # compare against the CRC otherwise
1421 check_byte = (zinfo.CRC >> 24) & 0xff
1422 if h[11] != check_byte:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001423 raise RuntimeError("Bad password for file %r" % name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001424
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001425 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001426 except:
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001427 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001428 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001429
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001430 def _open_to_write(self, zinfo, force_zip64=False):
1431 if force_zip64 and not self._allowZip64:
1432 raise ValueError(
1433 "force_zip64 is True, but allowZip64 was False when opening "
1434 "the ZIP file."
1435 )
1436 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001437 raise ValueError("Can't write to the ZIP file while there is "
1438 "another write handle open on it. "
1439 "Close the first handle before opening another.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001440
1441 # Sizes and CRC are overwritten with correct data after processing the file
1442 if not hasattr(zinfo, 'file_size'):
1443 zinfo.file_size = 0
1444 zinfo.compress_size = 0
1445 zinfo.CRC = 0
1446
1447 zinfo.flag_bits = 0x00
1448 if zinfo.compress_type == ZIP_LZMA:
1449 # Compressed data includes an end-of-stream (EOS) marker
1450 zinfo.flag_bits |= 0x02
1451 if not self._seekable:
1452 zinfo.flag_bits |= 0x08
1453
1454 if not zinfo.external_attr:
1455 zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
1456
1457 # Compressed size can be larger than uncompressed size
1458 zip64 = self._allowZip64 and \
1459 (force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1460
1461 if self._seekable:
1462 self.fp.seek(self.start_dir)
1463 zinfo.header_offset = self.fp.tell()
1464
1465 self._writecheck(zinfo)
1466 self._didModify = True
1467
1468 self.fp.write(zinfo.FileHeader(zip64))
1469
1470 self._writing = True
1471 return _ZipWriteFile(self, zinfo, zip64)
1472
Christian Heimes790c8232008-01-07 21:14:23 +00001473 def extract(self, member, path=None, pwd=None):
1474 """Extract a member from the archive to the current working directory,
1475 using its full name. Its file information is extracted as accurately
1476 as possible. `member' may be a filename or a ZipInfo object. You can
1477 specify a different directory using `path'.
1478 """
Christian Heimes790c8232008-01-07 21:14:23 +00001479 if path is None:
1480 path = os.getcwd()
Serhiy Storchakaeb65edd2017-03-08 15:45:43 +02001481 else:
1482 path = os.fspath(path)
Christian Heimes790c8232008-01-07 21:14:23 +00001483
1484 return self._extract_member(member, path, pwd)
1485
1486 def extractall(self, path=None, members=None, pwd=None):
1487 """Extract all members from the archive to the current working
1488 directory. `path' specifies a different directory to extract to.
1489 `members' is optional and must be a subset of the list returned
1490 by namelist().
1491 """
1492 if members is None:
1493 members = self.namelist()
1494
Serhiy Storchakaeb65edd2017-03-08 15:45:43 +02001495 if path is None:
1496 path = os.getcwd()
1497 else:
1498 path = os.fspath(path)
1499
Christian Heimes790c8232008-01-07 21:14:23 +00001500 for zipinfo in members:
Serhiy Storchakaeb65edd2017-03-08 15:45:43 +02001501 self._extract_member(zipinfo, path, pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001502
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001503 @classmethod
1504 def _sanitize_windows_name(cls, arcname, pathsep):
1505 """Replace bad characters and remove trailing dots from parts."""
1506 table = cls._windows_illegal_name_trans_table
1507 if not table:
1508 illegal = ':<>|"?*'
1509 table = str.maketrans(illegal, '_' * len(illegal))
1510 cls._windows_illegal_name_trans_table = table
1511 arcname = arcname.translate(table)
1512 # remove trailing dots
1513 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1514 # rejoin, removing empty parts.
1515 arcname = pathsep.join(x for x in arcname if x)
1516 return arcname
1517
Christian Heimes790c8232008-01-07 21:14:23 +00001518 def _extract_member(self, member, targetpath, pwd):
1519 """Extract the ZipInfo object 'member' to a physical
1520 file on the path targetpath.
1521 """
Serhiy Storchakaeb65edd2017-03-08 15:45:43 +02001522 if not isinstance(member, ZipInfo):
1523 member = self.getinfo(member)
1524
Christian Heimes790c8232008-01-07 21:14:23 +00001525 # build the destination pathname, replacing
1526 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001527 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001528
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001529 if os.path.altsep:
1530 arcname = arcname.replace(os.path.altsep, os.path.sep)
1531 # interpret absolute pathname as relative, remove drive letter or
1532 # UNC path, redundant separators, "." and ".." components.
1533 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001534 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001535 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001536 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001537 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001538 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001539 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001540
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001541 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001542 targetpath = os.path.normpath(targetpath)
1543
1544 # Create all upper directories if necessary.
1545 upperdirs = os.path.dirname(targetpath)
1546 if upperdirs and not os.path.exists(upperdirs):
1547 os.makedirs(upperdirs)
1548
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001549 if member.is_dir():
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001550 if not os.path.isdir(targetpath):
1551 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001552 return targetpath
1553
Antoine Pitrou17babc52012-11-17 23:50:08 +01001554 with self.open(member, pwd=pwd) as source, \
1555 open(targetpath, "wb") as target:
1556 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001557
1558 return targetpath
1559
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001560 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001561 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001562 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka9b7a1a12014-01-20 21:57:40 +02001563 import warnings
1564 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001565 if self.mode not in ('w', 'x', 'a'):
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001566 raise ValueError("write() requires mode 'w', 'x', or 'a'")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001567 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001568 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001569 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001570 _check_compression(zinfo.compress_type)
Serhiy Storchakacfbb3942014-09-23 21:34:24 +03001571 if not self._allowZip64:
1572 requires_zip64 = None
1573 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1574 requires_zip64 = "Files count"
1575 elif zinfo.file_size > ZIP64_LIMIT:
1576 requires_zip64 = "Filesize"
1577 elif zinfo.header_offset > ZIP64_LIMIT:
1578 requires_zip64 = "Zipfile size"
1579 if requires_zip64:
1580 raise LargeZipFile(requires_zip64 +
1581 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001582
1583 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001584 """Put the bytes from filename into the archive under the name
1585 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001586 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001587 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001588 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001589 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001590 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001591 "Can't write to ZIP archive while an open writing handle exists"
1592 )
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001593
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001594 zinfo = ZipInfo.from_file(filename, arcname)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001595
Serhiy Storchaka503f9082016-02-08 00:02:25 +02001596 if zinfo.is_dir():
1597 zinfo.compress_size = 0
1598 zinfo.CRC = 0
1599 else:
1600 if compress_type is not None:
1601 zinfo.compress_type = compress_type
1602 else:
1603 zinfo.compress_type = self.compression
1604
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001605 if zinfo.is_dir():
1606 with self._lock:
1607 if self._seekable:
1608 self.fp.seek(self.start_dir)
1609 zinfo.header_offset = self.fp.tell() # Start of header bytes
1610 if zinfo.compress_type == ZIP_LZMA:
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001611 # Compressed data includes an end-of-stream (EOS) marker
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001612 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001613
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001614 self._writecheck(zinfo)
1615 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001616
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001617 self.filelist.append(zinfo)
1618 self.NameToInfo[zinfo.filename] = zinfo
1619 self.fp.write(zinfo.FileHeader(False))
1620 self.start_dir = self.fp.tell()
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001621 else:
1622 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1623 shutil.copyfileobj(src, dest, 1024*8)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001624
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001625 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001626 """Write a file into the archive. The contents is 'data', which
1627 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1628 it is encoded as UTF-8 first.
1629 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001630 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001631 if isinstance(data, str):
1632 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001633 if not isinstance(zinfo_or_arcname, ZipInfo):
1634 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001635 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001636 zinfo.compress_type = self.compression
Serhiy Storchaka46a34922014-09-23 22:40:23 +03001637 if zinfo.filename[-1] == '/':
1638 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1639 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1640 else:
1641 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001642 else:
1643 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001644
1645 if not self.fp:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001646 raise ValueError(
Christian Tismer59202e52013-10-21 03:59:23 +02001647 "Attempt to write to ZIP archive that was already closed")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001648 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001649 raise ValueError(
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001650 "Can't write to ZIP archive while an open writing handle exists."
1651 )
1652
1653 if compress_type is not None:
1654 zinfo.compress_type = compress_type
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001655
Guido van Rossum85825dc2007-08-27 17:03:28 +00001656 zinfo.file_size = len(data) # Uncompressed size
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001657 with self._lock:
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001658 with self.open(zinfo, mode='w') as dest:
1659 dest.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001660
1661 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001662 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001663 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001664
1665 def close(self):
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001666 """Close the file, and for mode 'w', 'x' and 'a' write the ending
Fred Drake484d7352000-10-02 21:14:52 +00001667 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001668 if self.fp is None:
1669 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001670
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001671 if self._writing:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001672 raise ValueError("Can't close the ZIP file while there is "
1673 "an open writing handle on it. "
1674 "Close the writing handle before closing the zip.")
Serhiy Storchaka18ee29d2016-05-13 13:52:49 +03001675
Antoine Pitrou17babc52012-11-17 23:50:08 +01001676 try:
Serhiy Storchaka764fc9b2015-03-25 10:09:41 +02001677 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001678 with self._lock:
Serhiy Storchaka77d89972015-03-23 01:09:35 +02001679 if self._seekable:
Serhiy Storchakaa14f7d22015-01-26 14:01:27 +02001680 self.fp.seek(self.start_dir)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001681 self._write_end_record()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001682 finally:
1683 fp = self.fp
1684 self.fp = None
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001685 self._fpclose(fp)
1686
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001687 def _write_end_record(self):
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001688 for zinfo in self.filelist: # write central directory
1689 dt = zinfo.date_time
1690 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1691 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1692 extra = []
1693 if zinfo.file_size > ZIP64_LIMIT \
1694 or zinfo.compress_size > ZIP64_LIMIT:
1695 extra.append(zinfo.file_size)
1696 extra.append(zinfo.compress_size)
1697 file_size = 0xffffffff
1698 compress_size = 0xffffffff
1699 else:
1700 file_size = zinfo.file_size
1701 compress_size = zinfo.compress_size
1702
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001703 if zinfo.header_offset > ZIP64_LIMIT:
1704 extra.append(zinfo.header_offset)
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001705 header_offset = 0xffffffff
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001706 else:
1707 header_offset = zinfo.header_offset
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001708
1709 extra_data = zinfo.extra
1710 min_version = 0
1711 if extra:
1712 # Append a ZIP64 field to the extra's
1713 extra_data = struct.pack(
1714 '<HH' + 'Q'*len(extra),
1715 1, 8*len(extra), *extra) + extra_data
1716
1717 min_version = ZIP64_VERSION
1718
1719 if zinfo.compress_type == ZIP_BZIP2:
1720 min_version = max(BZIP2_VERSION, min_version)
1721 elif zinfo.compress_type == ZIP_LZMA:
1722 min_version = max(LZMA_VERSION, min_version)
1723
1724 extract_version = max(min_version, zinfo.extract_version)
1725 create_version = max(min_version, zinfo.create_version)
1726 try:
1727 filename, flag_bits = zinfo._encodeFilenameFlags()
1728 centdir = struct.pack(structCentralDir,
1729 stringCentralDir, create_version,
1730 zinfo.create_system, extract_version, zinfo.reserved,
1731 flag_bits, zinfo.compress_type, dostime, dosdate,
1732 zinfo.CRC, compress_size, file_size,
1733 len(filename), len(extra_data), len(zinfo.comment),
1734 0, zinfo.internal_attr, zinfo.external_attr,
1735 header_offset)
1736 except DeprecationWarning:
1737 print((structCentralDir, stringCentralDir, create_version,
1738 zinfo.create_system, extract_version, zinfo.reserved,
1739 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1740 zinfo.CRC, compress_size, file_size,
1741 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1742 0, zinfo.internal_attr, zinfo.external_attr,
1743 header_offset), file=sys.stderr)
1744 raise
1745 self.fp.write(centdir)
1746 self.fp.write(filename)
1747 self.fp.write(extra_data)
1748 self.fp.write(zinfo.comment)
1749
1750 pos2 = self.fp.tell()
1751 # Write end-of-zip-archive record
1752 centDirCount = len(self.filelist)
1753 centDirSize = pos2 - self.start_dir
Serhiy Storchaka70dc6a72017-05-06 15:10:50 +03001754 centDirOffset = self.start_dir
Serhiy Storchakaf15e5242015-01-26 13:53:38 +02001755 requires_zip64 = None
1756 if centDirCount > ZIP_FILECOUNT_LIMIT:
1757 requires_zip64 = "Files count"
1758 elif centDirOffset > ZIP64_LIMIT:
1759 requires_zip64 = "Central directory offset"
1760 elif centDirSize > ZIP64_LIMIT:
1761 requires_zip64 = "Central directory size"
1762 if requires_zip64:
1763 # Need to write the ZIP64 end-of-archive records
1764 if not self._allowZip64:
1765 raise LargeZipFile(requires_zip64 +
1766 " would require ZIP64 extensions")
1767 zip64endrec = struct.pack(
1768 structEndArchive64, stringEndArchive64,
1769 44, 45, 45, 0, 0, centDirCount, centDirCount,
1770 centDirSize, centDirOffset)
1771 self.fp.write(zip64endrec)
1772
1773 zip64locrec = struct.pack(
1774 structEndArchive64Locator,
1775 stringEndArchive64Locator, 0, pos2, 1)
1776 self.fp.write(zip64locrec)
1777 centDirCount = min(centDirCount, 0xFFFF)
1778 centDirSize = min(centDirSize, 0xFFFFFFFF)
1779 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1780
1781 endrec = struct.pack(structEndArchive, stringEndArchive,
1782 0, 0, centDirCount, centDirCount,
1783 centDirSize, centDirOffset, len(self._comment))
1784 self.fp.write(endrec)
1785 self.fp.write(self._comment)
1786 self.fp.flush()
1787
Serhiy Storchaka1ad088f2014-12-03 09:11:57 +02001788 def _fpclose(self, fp):
1789 assert self._fileRefCnt > 0
1790 self._fileRefCnt -= 1
1791 if not self._fileRefCnt and not self._filePassed:
1792 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001793
1794
1795class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001796 """Class to create ZIP archives with Python library files and packages."""
1797
Georg Brandl8334fd92010-12-04 10:26:46 +00001798 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001799 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001800 ZipFile.__init__(self, file, mode=mode, compression=compression,
1801 allowZip64=allowZip64)
1802 self._optimize = optimize
1803
Christian Tismer59202e52013-10-21 03:59:23 +02001804 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001805 """Add all files from "pathname" to the ZIP archive.
1806
Fred Drake484d7352000-10-02 21:14:52 +00001807 If pathname is a package directory, search the directory and
1808 all package subdirectories recursively for all *.py and enter
1809 the modules into the archive. If pathname is a plain
1810 directory, listdir *.py and enter all modules. Else, pathname
1811 must be a Python *.py file and the module will be put into the
Brett Cannonf299abd2015-04-13 14:21:02 -04001812 archive. Added modules are always module.pyc.
Fred Drake484d7352000-10-02 21:14:52 +00001813 This method will compile the module.py into module.pyc if
1814 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001815 If filterfunc(pathname) is given, it is called with every argument.
1816 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001817 """
Serhiy Storchakaeb65edd2017-03-08 15:45:43 +02001818 pathname = os.fspath(pathname)
Christian Tismer59202e52013-10-21 03:59:23 +02001819 if filterfunc and not filterfunc(pathname):
1820 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001821 label = 'path' if os.path.isdir(pathname) else 'file'
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001822 print('%s %r skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001823 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001824 dir, name = os.path.split(pathname)
1825 if os.path.isdir(pathname):
1826 initname = os.path.join(pathname, "__init__.py")
1827 if os.path.isfile(initname):
1828 # This is a package directory, add it
1829 if basename:
1830 basename = "%s/%s" % (basename, name)
1831 else:
1832 basename = name
1833 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001834 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001835 fname, arcname = self._get_codename(initname[0:-3], basename)
1836 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001837 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001838 self.write(fname, arcname)
1839 dirlist = os.listdir(pathname)
1840 dirlist.remove("__init__.py")
1841 # Add all *.py files and package subdirectories
1842 for filename in dirlist:
1843 path = os.path.join(pathname, filename)
1844 root, ext = os.path.splitext(filename)
1845 if os.path.isdir(path):
1846 if os.path.isfile(os.path.join(path, "__init__.py")):
1847 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001848 self.writepy(path, basename,
1849 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001850 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001851 if filterfunc and not filterfunc(path):
1852 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001853 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001854 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001855 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001856 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001857 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001858 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001859 self.write(fname, arcname)
1860 else:
1861 # This is NOT a package directory, add its files at top level
1862 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001863 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001864 for filename in os.listdir(pathname):
1865 path = os.path.join(pathname, filename)
1866 root, ext = os.path.splitext(filename)
1867 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001868 if filterfunc and not filterfunc(path):
1869 if self.debug:
Serhiy Storchakab0d497c2016-09-10 21:28:07 +03001870 print('file %r skipped by filterfunc' % path)
Christian Tismer410d9312013-10-22 04:09:28 +02001871 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001872 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001873 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001874 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001875 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001876 self.write(fname, arcname)
1877 else:
1878 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001879 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001880 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001881 fname, arcname = self._get_codename(pathname[0:-3], basename)
1882 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001883 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001884 self.write(fname, arcname)
1885
1886 def _get_codename(self, pathname, basename):
1887 """Return (filename, archivename) for the path.
1888
Fred Drake484d7352000-10-02 21:14:52 +00001889 Given a module name path, return the correct file path and
1890 archive name, compiling if necessary. For example, given
1891 /python/lib/string, return (/python/lib/string.pyc, string).
1892 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001893 def _compile(file, optimize=-1):
1894 import py_compile
1895 if self.debug:
1896 print("Compiling", file)
1897 try:
1898 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001899 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001900 print(err.msg)
1901 return False
1902 return True
1903
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001904 file_py = pathname + ".py"
1905 file_pyc = pathname + ".pyc"
Brett Cannonf299abd2015-04-13 14:21:02 -04001906 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
1907 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
1908 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
Georg Brandl8334fd92010-12-04 10:26:46 +00001909 if self._optimize == -1:
1910 # legacy mode: use whatever file is present
Brett Cannonf299abd2015-04-13 14:21:02 -04001911 if (os.path.isfile(file_pyc) and
Georg Brandl8334fd92010-12-04 10:26:46 +00001912 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1913 # Use .pyc file.
1914 arcname = fname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001915 elif (os.path.isfile(pycache_opt0) and
1916 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
Georg Brandl8334fd92010-12-04 10:26:46 +00001917 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1918 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001919 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001920 arcname = file_pyc
Brett Cannonf299abd2015-04-13 14:21:02 -04001921 elif (os.path.isfile(pycache_opt1) and
1922 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
1923 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001924 # file name in the archive.
Brett Cannonf299abd2015-04-13 14:21:02 -04001925 fname = pycache_opt1
1926 arcname = file_pyc
1927 elif (os.path.isfile(pycache_opt2) and
1928 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
1929 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1930 # file name in the archive.
1931 fname = pycache_opt2
1932 arcname = file_pyc
Barry Warsaw28a691b2010-04-17 00:19:56 +00001933 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001934 # Compile py into PEP 3147 pyc file.
1935 if _compile(file_py):
Brett Cannonf299abd2015-04-13 14:21:02 -04001936 if sys.flags.optimize == 0:
1937 fname = pycache_opt0
1938 elif sys.flags.optimize == 1:
1939 fname = pycache_opt1
1940 else:
1941 fname = pycache_opt2
1942 arcname = file_pyc
Georg Brandl8334fd92010-12-04 10:26:46 +00001943 else:
1944 fname = arcname = file_py
1945 else:
1946 # new mode: use given optimization level
1947 if self._optimize == 0:
Brett Cannonf299abd2015-04-13 14:21:02 -04001948 fname = pycache_opt0
Georg Brandl8334fd92010-12-04 10:26:46 +00001949 arcname = file_pyc
1950 else:
Brett Cannonf299abd2015-04-13 14:21:02 -04001951 arcname = file_pyc
1952 if self._optimize == 1:
1953 fname = pycache_opt1
1954 elif self._optimize == 2:
1955 fname = pycache_opt2
1956 else:
1957 msg = "invalid value for 'optimize': {!r}".format(self._optimize)
1958 raise ValueError(msg)
Georg Brandl8334fd92010-12-04 10:26:46 +00001959 if not (os.path.isfile(fname) and
1960 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1961 if not _compile(file_py, optimize=self._optimize):
1962 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001963 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001964 if basename:
1965 archivename = "%s/%s" % (basename, archivename)
1966 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001967
1968
1969def main(args = None):
1970 import textwrap
1971 USAGE=textwrap.dedent("""\
1972 Usage:
1973 zipfile.py -l zipfile.zip # Show listing of a zipfile
1974 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1975 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1976 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1977 """)
1978 if args is None:
1979 args = sys.argv[1:]
1980
1981 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001982 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001983 sys.exit(1)
1984
1985 if args[0] == '-l':
1986 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001987 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001988 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001989 with ZipFile(args[1], 'r') as zf:
1990 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001991
1992 elif args[0] == '-t':
1993 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001994 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001995 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001996 with ZipFile(args[1], 'r') as zf:
1997 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001998 if badfile:
1999 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002000 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002001
2002 elif args[0] == '-e':
2003 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002004 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002005 sys.exit(1)
2006
Antoine Pitrou17babc52012-11-17 23:50:08 +01002007 with ZipFile(args[1], 'r') as zf:
Serhiy Storchaka97f17ff2014-08-17 15:14:48 +03002008 zf.extractall(args[2])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002009
2010 elif args[0] == '-c':
2011 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002012 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002013 sys.exit(1)
2014
2015 def addToZip(zf, path, zippath):
2016 if os.path.isfile(path):
2017 zf.write(path, zippath, ZIP_DEFLATED)
2018 elif os.path.isdir(path):
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002019 if zippath:
2020 zf.write(path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002021 for nm in os.listdir(path):
2022 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02002023 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002024 # else: ignore
2025
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02002026 with ZipFile(args[1], 'w') as zf:
Serhiy Storchaka518e71b2014-10-04 13:39:34 +03002027 for path in args[2:]:
2028 zippath = os.path.basename(path)
2029 if not zippath:
2030 zippath = os.path.basename(os.path.dirname(path))
2031 if zippath in ('', os.curdir, os.pardir):
2032 zippath = ''
2033 addToZip(zf, path, zippath)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002034
2035if __name__ == "__main__":
2036 main()