blob: f5564dad62a74b472c5969a542c8d2b6d7142e1f [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
27except ImportError:
28 bz2 = None
29
30__all__ = ["BadZipFile", "BadZipfile", "error",
Georg Brandl5c016782012-05-01 09:00:59 +020031 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2",
Georg Brandl4d540882010-10-28 06:42:33 +000032 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000033
Georg Brandl4d540882010-10-28 06:42:33 +000034class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000036
37
38class LargeZipFile(Exception):
39 """
40 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
41 and those extensions are disabled.
42 """
43
Georg Brandl4d540882010-10-28 06:42:33 +000044error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000047ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000048ZIP_FILECOUNT_LIMIT = 1 << 16
49ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000050
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051# constants for Zip file compression methods
52ZIP_STORED = 0
53ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020054ZIP_BZIP2 = 12
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# Other ZIP compression methods not supported
56
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020057DEFAULT_VERSION = 20
58ZIP64_VERSION = 45
59BZIP2_VERSION = 46
60
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061# Below are some formats and associated data for reading/writing headers using
62# the struct module. The names and structures of headers/records are those used
63# in the PKWARE description of the ZIP file format:
64# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
65# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000066
Martin v. Löwisb09b8442008-07-03 14:13:42 +000067# The "end of central directory" structure, magic number, size, and indices
68# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000069structEndArchive = b"<4s4H2LH"
70stringEndArchive = b"PK\005\006"
71sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072
73_ECD_SIGNATURE = 0
74_ECD_DISK_NUMBER = 1
75_ECD_DISK_START = 2
76_ECD_ENTRIES_THIS_DISK = 3
77_ECD_ENTRIES_TOTAL = 4
78_ECD_SIZE = 5
79_ECD_OFFSET = 6
80_ECD_COMMENT_SIZE = 7
81# These last two indices are not part of the structure as defined in the
82# spec, but they are used internally by this module as a convenience
83_ECD_COMMENT = 8
84_ECD_LOCATION = 9
85
86# The "central directory" structure, magic number, size, and indices
87# of entries in the structure (section V.F in the format document)
88structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000089stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000090sizeCentralDir = struct.calcsize(structCentralDir)
91
Fred Drake3e038e52001-02-28 17:56:26 +000092# indexes of entries in the central directory structure
93_CD_SIGNATURE = 0
94_CD_CREATE_VERSION = 1
95_CD_CREATE_SYSTEM = 2
96_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000097_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000098_CD_FLAG_BITS = 5
99_CD_COMPRESS_TYPE = 6
100_CD_TIME = 7
101_CD_DATE = 8
102_CD_CRC = 9
103_CD_COMPRESSED_SIZE = 10
104_CD_UNCOMPRESSED_SIZE = 11
105_CD_FILENAME_LENGTH = 12
106_CD_EXTRA_FIELD_LENGTH = 13
107_CD_COMMENT_LENGTH = 14
108_CD_DISK_NUMBER_START = 15
109_CD_INTERNAL_FILE_ATTRIBUTES = 16
110_CD_EXTERNAL_FILE_ATTRIBUTES = 17
111_CD_LOCAL_HEADER_OFFSET = 18
112
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000113# The "local file header" structure, magic number, size, and indices
114# (section V.A in the format document)
115structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000116stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000117sizeFileHeader = struct.calcsize(structFileHeader)
118
Fred Drake3e038e52001-02-28 17:56:26 +0000119_FH_SIGNATURE = 0
120_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000122_FH_GENERAL_PURPOSE_FLAG_BITS = 3
123_FH_COMPRESSION_METHOD = 4
124_FH_LAST_MOD_TIME = 5
125_FH_LAST_MOD_DATE = 6
126_FH_CRC = 7
127_FH_COMPRESSED_SIZE = 8
128_FH_UNCOMPRESSED_SIZE = 9
129_FH_FILENAME_LENGTH = 10
130_FH_EXTRA_FIELD_LENGTH = 11
131
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000133structEndArchive64Locator = "<4sLQL"
134stringEndArchive64Locator = b"PK\x06\x07"
135sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000136
137# The "Zip64 end of central directory" record, magic number, size, and indices
138# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000139structEndArchive64 = "<4sQ2H2L4Q"
140stringEndArchive64 = b"PK\x06\x06"
141sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142
143_CD64_SIGNATURE = 0
144_CD64_DIRECTORY_RECSIZE = 1
145_CD64_CREATE_VERSION = 2
146_CD64_EXTRACT_VERSION = 3
147_CD64_DISK_NUMBER = 4
148_CD64_DISK_NUMBER_START = 5
149_CD64_NUMBER_ENTRIES_THIS_DISK = 6
150_CD64_NUMBER_ENTRIES_TOTAL = 7
151_CD64_DIRECTORY_SIZE = 8
152_CD64_OFFSET_START_CENTDIR = 9
153
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000154def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000155 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000156 if _EndRecData(fp):
157 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000158 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000159 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000160 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000161
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000162def is_zipfile(filename):
163 """Quickly see if a file is a ZIP file by checking the magic number.
164
165 The filename argument may be a file or file-like object too.
166 """
167 result = False
168 try:
169 if hasattr(filename, "read"):
170 result = _check_zipfile(fp=filename)
171 else:
172 with open(filename, "rb") as fp:
173 result = _check_zipfile(fp)
174 except IOError:
175 pass
176 return result
177
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000178def _EndRecData64(fpin, offset, endrec):
179 """
180 Read the ZIP64 end-of-archive records and use that to update endrec
181 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000182 try:
183 fpin.seek(offset - sizeEndCentDir64Locator, 2)
184 except IOError:
185 # If the seek fails, the file is not large enough to contain a ZIP64
186 # end-of-archive record, so just return the end record we were given.
187 return endrec
188
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000189 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000190 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
191 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000192 return endrec
193
194 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000195 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000196
197 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
199 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000200 sig, sz, create_version, read_version, disk_num, disk_dir, \
201 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000202 struct.unpack(structEndArchive64, data)
203 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 return endrec
205
206 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000207 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 endrec[_ECD_DISK_NUMBER] = disk_num
209 endrec[_ECD_DISK_START] = disk_dir
210 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
211 endrec[_ECD_ENTRIES_TOTAL] = dircount2
212 endrec[_ECD_SIZE] = dirsize
213 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 return endrec
215
216
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000217def _EndRecData(fpin):
218 """Return data from the "End of Central Directory" record, or None.
219
220 The data is a list of the nine items in the ZIP "End of central dir"
221 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222
223 # Determine file size
224 fpin.seek(0, 2)
225 filesize = fpin.tell()
226
227 # Check to see if this is ZIP file with no archive comment (the
228 # "end of central directory" structure should be the last item in the
229 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000230 try:
231 fpin.seek(-sizeEndCentDir, 2)
232 except IOError:
233 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000235 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000237 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 endrec=list(endrec)
239
240 # Append a blank comment and record start offset
241 endrec.append(b"")
242 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000243
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246
247 # Either this is not a ZIP file, or it is a ZIP file with an archive
248 # comment. Search the end of the file for the "end of central directory"
249 # record signature. The comment is the last item in the ZIP file and may be
250 # up to 64K long. It is assumed that the "end of central directory" magic
251 # number does not appear in the comment.
252 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
253 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000254 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000256 if start >= 0:
257 # found the magic number; attempt to unpack and interpret
258 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000259 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400260 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
261 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
262 endrec.append(comment)
263 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000264
R David Murray4fbb9db2011-06-09 15:50:51 -0400265 # Try to read the "Zip64 end of central directory" structure
266 return _EndRecData64(fpin, maxCommentStart + start - filesize,
267 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000268
269 # Unable to find a valid end of central directory structure
270 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000271
Fred Drake484d7352000-10-02 21:14:52 +0000272
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000273class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000274 """Class with attributes describing each file in the ZIP archive."""
275
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000276 __slots__ = (
277 'orig_filename',
278 'filename',
279 'date_time',
280 'compress_type',
281 'comment',
282 'extra',
283 'create_system',
284 'create_version',
285 'extract_version',
286 'reserved',
287 'flag_bits',
288 'volume',
289 'internal_attr',
290 'external_attr',
291 'header_offset',
292 'CRC',
293 'compress_size',
294 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000295 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000296 )
297
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000299 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000300
301 # Terminate the file name at the first null byte. Null bytes in file
302 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000303 null_byte = filename.find(chr(0))
304 if null_byte >= 0:
305 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000306 # This is used to ensure paths in generated ZIP files always use
307 # forward slashes as the directory separator, as required by the
308 # ZIP format specification.
309 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000310 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000311
Greg Ward8e36d282003-06-18 00:53:06 +0000312 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000313 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800314
315 if date_time[0] < 1980:
316 raise ValueError('ZIP does not support timestamps before 1980')
317
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000319 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000320 self.comment = b"" # Comment for each file
321 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000322 if sys.platform == 'win32':
323 self.create_system = 0 # System which created ZIP archive
324 else:
325 # Assume everything else is unix-y
326 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200327 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
328 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000329 self.reserved = 0 # Must be zero
330 self.flag_bits = 0 # ZIP flag bits
331 self.volume = 0 # Volume number of file header
332 self.internal_attr = 0 # Internal attributes
333 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000336 # CRC CRC-32 of the uncompressed file
337 # compress_size Size of the compressed file
338 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339
340 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000341 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 dt = self.date_time
343 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000344 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000345 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000346 # Set these to zero because we write them after the file data
347 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 else:
Tim Peterse1190062001-01-15 03:34:38 +0000349 CRC = self.CRC
350 compress_size = self.compress_size
351 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352
353 extra = self.extra
354
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200355 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000356 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
357 # File is larger than what fits into a 4 byte integer,
358 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000359 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000360 extra = extra + struct.pack(fmt,
361 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000362 file_size = 0xffffffff
363 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200364 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200366 if self.compress_type == ZIP_BZIP2:
367 min_version = max(BZIP2_VERSION, min_version)
368
369 self.extract_version = max(min_version, self.extract_version)
370 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000371 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000372 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000373 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000374 self.compress_type, dostime, dosdate, CRC,
375 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000376 len(filename), len(extra))
377 return header + filename + extra
378
379 def _encodeFilenameFlags(self):
380 try:
381 return self.filename.encode('ascii'), self.flag_bits
382 except UnicodeEncodeError:
383 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000384
385 def _decodeExtra(self):
386 # Try to decode the extra field.
387 extra = self.extra
388 unpack = struct.unpack
389 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000390 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391 if tp == 1:
392 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000393 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000394 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000395 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000397 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000398 elif ln == 0:
399 counts = ()
400 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000401 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000402
403 idx = 0
404
405 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000406 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 self.file_size = counts[idx]
408 idx += 1
409
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000410 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411 self.compress_size = counts[idx]
412 idx += 1
413
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000414 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000415 old = self.header_offset
416 self.header_offset = counts[idx]
417 idx+=1
418
419 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000420
421
Thomas Wouterscf297e42007-02-23 15:07:44 +0000422class _ZipDecrypter:
423 """Class to handle decryption of files stored within a ZIP archive.
424
425 ZIP supports a password-based form of encryption. Even though known
426 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000427 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000428
429 Usage:
430 zd = _ZipDecrypter(mypwd)
431 plain_char = zd(cypher_char)
432 plain_text = map(zd, cypher_text)
433 """
434
435 def _GenerateCRCTable():
436 """Generate a CRC-32 table.
437
438 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
439 internal keys. We noticed that a direct implementation is faster than
440 relying on binascii.crc32().
441 """
442 poly = 0xedb88320
443 table = [0] * 256
444 for i in range(256):
445 crc = i
446 for j in range(8):
447 if crc & 1:
448 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
449 else:
450 crc = ((crc >> 1) & 0x7FFFFFFF)
451 table[i] = crc
452 return table
453 crctable = _GenerateCRCTable()
454
455 def _crc32(self, ch, crc):
456 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000457 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000458
459 def __init__(self, pwd):
460 self.key0 = 305419896
461 self.key1 = 591751049
462 self.key2 = 878082192
463 for p in pwd:
464 self._UpdateKeys(p)
465
466 def _UpdateKeys(self, c):
467 self.key0 = self._crc32(c, self.key0)
468 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
469 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000470 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000471
472 def __call__(self, c):
473 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000474 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000475 k = self.key2 | 2
476 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000477 self._UpdateKeys(c)
478 return c
479
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200480
481def _check_compression(compression):
482 if compression == ZIP_STORED:
483 pass
484 elif compression == ZIP_DEFLATED:
485 if not zlib:
486 raise RuntimeError(
487 "Compression requires the (missing) zlib module")
488 elif compression == ZIP_BZIP2:
489 if not bz2:
490 raise RuntimeError(
491 "Compression requires the (missing) bz2 module")
492 else:
493 raise RuntimeError("That compression method is not supported")
494
495
496def _get_compressor(compress_type):
497 if compress_type == ZIP_DEFLATED:
498 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
499 zlib.DEFLATED, -15)
500 elif compress_type == ZIP_BZIP2:
501 return bz2.BZ2Compressor()
502 else:
503 return None
504
505
506def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200507 if compress_type == ZIP_STORED:
508 return None
509 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200510 return zlib.decompressobj(-15)
511 elif compress_type == ZIP_BZIP2:
512 return bz2.BZ2Decompressor()
513 else:
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200514 unknown_compressors = {
515 1: 'shrink',
516 2: 'reduce',
517 3: 'reduce',
518 4: 'reduce',
519 5: 'reduce',
520 6: 'implode',
521 9: 'enhanced deflate',
522 10: 'implode',
523 14: 'lzma',
524 }
525 descr = unknown_compressors.get(compress_type)
526 if descr:
527 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
528 else:
529 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200530
531
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000532class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000533 """File-like object for reading an archive member.
534 Is returned by ZipFile.open().
535 """
536
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000537 # Max size supported by decompressor.
538 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000539
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000540 # Read from compressed files in 4k blocks.
541 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000543 # Search for universal newlines or line chunks.
544 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
545
Łukasz Langae94980a2010-11-22 23:31:26 +0000546 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
547 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000548 self._fileobj = fileobj
549 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000550 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000551
Ezio Melotti92b47432010-01-28 01:44:41 +0000552 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000553 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200554 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000555
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200556 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000557
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200558 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000559 self._readbuffer = b''
560 self._offset = 0
561
562 self._universal = 'U' in mode
563 self.newlines = None
564
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000565 # Adjust read size for encrypted files since the first 12 bytes
566 # are for the encryption/password information.
567 if self._decrypter is not None:
568 self._compress_left -= 12
569
570 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000571 self.name = zipinfo.filename
572
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000573 if hasattr(zipinfo, 'CRC'):
574 self._expected_crc = zipinfo.CRC
575 self._running_crc = crc32(b'') & 0xffffffff
576 else:
577 self._expected_crc = None
578
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000579 def readline(self, limit=-1):
580 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000581
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000582 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000583 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000584
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000585 if not self._universal and limit < 0:
586 # Shortcut common case - newline found in buffer.
587 i = self._readbuffer.find(b'\n', self._offset) + 1
588 if i > 0:
589 line = self._readbuffer[self._offset: i]
590 self._offset = i
591 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000592
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000593 if not self._universal:
594 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000595
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000596 line = b''
597 while limit < 0 or len(line) < limit:
598 readahead = self.peek(2)
599 if readahead == b'':
600 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000601
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000602 #
603 # Search for universal newlines or line chunks.
604 #
605 # The pattern returns either a line chunk or a newline, but not
606 # both. Combined with peek(2), we are assured that the sequence
607 # '\r\n' is always retrieved completely and never split into
608 # separate newlines - '\r', '\n' due to coincidental readaheads.
609 #
610 match = self.PATTERN.search(readahead)
611 newline = match.group('newline')
612 if newline is not None:
613 if self.newlines is None:
614 self.newlines = []
615 if newline not in self.newlines:
616 self.newlines.append(newline)
617 self._offset += len(newline)
618 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000619
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000620 chunk = match.group('chunk')
621 if limit >= 0:
622 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000623
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000624 self._offset += len(chunk)
625 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000626
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000627 return line
628
629 def peek(self, n=1):
630 """Returns buffered bytes without advancing the position."""
631 if n > len(self._readbuffer) - self._offset:
632 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200633 if len(chunk) > self._offset:
634 self._readbuffer = chunk + self._readbuffer[self._offset:]
635 self._offset = 0
636 else:
637 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638
639 # Return up to 512 bytes to reduce allocation overhead for tight loops.
640 return self._readbuffer[self._offset: self._offset + 512]
641
642 def readable(self):
643 return True
644
645 def read(self, n=-1):
646 """Read and return up to n bytes.
647 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000648 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200649 if n is None or n < 0:
650 buf = self._readbuffer[self._offset:]
651 self._readbuffer = b''
652 self._offset = 0
653 while not self._eof:
654 buf += self._read1(self.MAX_N)
655 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200657 n -= len(self._readbuffer) - self._offset
658 if n < 0:
659 buf = self._readbuffer[self._offset:n]
660 self._offset += len(buf)
661 return buf
662
663 buf = self._readbuffer[self._offset:]
664 self._readbuffer = b''
665 self._offset = 0
666 while n > 0 and not self._eof:
667 data = self._read1(n)
668 if n < len(data):
669 self._readbuffer = data
670 self._offset = n
671 buf += data[:n]
672 break
673 buf += data
674 n -= len(data)
675 return buf
676
677 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000678 # Update the CRC using the given data.
679 if self._expected_crc is None:
680 # No need to compute the CRC if we don't have a reference value
681 return
682 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
683 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200684 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000685 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000686
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000687 def read1(self, n):
688 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000689
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200690 if n is None or n < 0:
691 buf = self._readbuffer[self._offset:]
692 self._readbuffer = b''
693 self._offset = 0
694 data = self._read1(self.MAX_N)
695 buf += data
696 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000697
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200698 n -= len(self._readbuffer) - self._offset
699 if n < 0:
700 buf = self._readbuffer[self._offset:n]
701 self._offset += len(buf)
702 return buf
703
704 buf = self._readbuffer[self._offset:]
705 self._readbuffer = b''
706 self._offset = 0
707 if n > 0:
708 data = self._read1(n)
709 if n < len(data):
710 self._readbuffer = data
711 self._offset = n
712 data = data[:n]
713 buf += data
714 return buf
715
716 def _read1(self, n):
717 # Read up to n compressed bytes with at most one read() system call,
718 # decrypt and decompress them.
719 if self._eof or n <= 0:
720 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000721
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000722 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200723 if self._compress_type == ZIP_DEFLATED:
724 ## Handle unconsumed data.
725 data = self._decompressor.unconsumed_tail
726 if n > len(data):
727 data += self._read2(n - len(data))
728 else:
729 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000730
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200731 if self._compress_type == ZIP_STORED:
732 self._eof = self._compress_left <= 0
733 elif self._compress_type == ZIP_DEFLATED:
734 n = max(n, self.MIN_READ_SIZE)
735 data = self._decompressor.decompress(data, n)
736 self._eof = (self._decompressor.eof or
737 self._compress_left <= 0 and
738 not self._decompressor.unconsumed_tail)
739 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000740 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200741 else:
742 data = self._decompressor.decompress(data)
743 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000744
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200745 data = data[:self._left]
746 self._left -= len(data)
747 if self._left <= 0:
748 self._eof = True
749 self._update_crc(data)
750 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000751
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200752 def _read2(self, n):
753 if self._compress_left <= 0:
754 return b''
755
756 n = max(n, self.MIN_READ_SIZE)
757 n = min(n, self._compress_left)
758
759 data = self._fileobj.read(n)
760 self._compress_left -= len(data)
761
762 if self._decrypter is not None:
763 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000764 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000765
Łukasz Langae94980a2010-11-22 23:31:26 +0000766 def close(self):
767 try:
768 if self._close_fileobj:
769 self._fileobj.close()
770 finally:
771 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000773
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000774class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000775 """ Class with methods to open, read, write, close, list zip files.
776
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000777 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000778
Fred Drake3d9091e2001-03-26 15:49:24 +0000779 file: Either the path to the file, or a file-like object.
780 If it is a path, the file will be opened and closed by ZipFile.
781 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200782 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or
783 ZIP_BZIP2 (requires bz2).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000784 allowZip64: if True ZipFile will create files with ZIP64 extensions when
785 needed, otherwise it will raise an exception when this would
786 be necessary.
787
Fred Drake3d9091e2001-03-26 15:49:24 +0000788 """
Fred Drake484d7352000-10-02 21:14:52 +0000789
Fred Drake90eac282001-02-28 05:29:34 +0000790 fp = None # Set here since __del__ checks it
791
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000792 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000793 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000794 if mode not in ("r", "w", "a"):
795 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
796
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200797 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000798
799 self._allowZip64 = allowZip64
800 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000801 self.debug = 0 # Level of printing: 0 through 3
802 self.NameToInfo = {} # Find file info given name
803 self.filelist = [] # List of ZipInfo instances for archive
804 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000805 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000806 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400807 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000808
Fred Drake3d9091e2001-03-26 15:49:24 +0000809 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000810 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000811 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000812 self._filePassed = 0
813 self.filename = file
814 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000815 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000816 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000817 except IOError:
818 if mode == 'a':
819 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000820 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000821 else:
822 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000823 else:
824 self._filePassed = 1
825 self.fp = file
826 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000827
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000828 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829 self._GetContents()
830 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000831 # set the modified flag so central directory gets written
832 # even if no files are added to the archive
833 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000835 try:
836 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000837 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000839 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000840 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000841 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000842 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000843
844 # set the modified flag so central directory gets written
845 # even if no files are added to the archive
846 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000847 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000848 if not self._filePassed:
849 self.fp.close()
850 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000851 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000852
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000853 def __enter__(self):
854 return self
855
856 def __exit__(self, type, value, traceback):
857 self.close()
858
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000859 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000860 """Read the directory, making sure we close the file if the format
861 is bad."""
862 try:
863 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000864 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000865 if not self._filePassed:
866 self.fp.close()
867 self.fp = None
868 raise
869
870 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000871 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000872 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000873 try:
874 endrec = _EndRecData(fp)
875 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000876 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000877 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000878 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000879 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000880 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000881 size_cd = endrec[_ECD_SIZE] # bytes in central directory
882 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400883 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000884
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000885 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000886 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000887 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
888 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000889 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
890
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000891 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000892 inferred = concat + offset_cd
893 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000894 # self.start_dir: Position of start of central directory
895 self.start_dir = offset_cd + concat
896 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000897 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000898 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000899 total = 0
900 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000901 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000902 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000903 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000904 centdir = struct.unpack(structCentralDir, centdir)
905 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000906 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000907 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000908 flags = centdir[5]
909 if flags & 0x800:
910 # UTF-8 file names extension
911 filename = filename.decode('utf-8')
912 else:
913 # Historical ZIP filename encoding
914 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000916 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000917 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
918 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000919 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000920 (x.create_version, x.create_system, x.extract_version, x.reserved,
921 x.flag_bits, x.compress_type, t, d,
922 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
923 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
924 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000925 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000926 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000927 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000928
929 x._decodeExtra()
930 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931 self.filelist.append(x)
932 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000933
934 # update total bytes read from central directory
935 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
936 + centdir[_CD_EXTRA_FIELD_LENGTH]
937 + centdir[_CD_COMMENT_LENGTH])
938
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000939 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000940 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000941
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000942
943 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000944 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -0600945 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000946
947 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000948 """Return a list of class ZipInfo instances for files in the
949 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000950 return self.filelist
951
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000952 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000953 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000954 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
955 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000956 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000957 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000958 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
959 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000960
961 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000962 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000963 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 for zinfo in self.filelist:
965 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000966 # Read by chunks, to avoid an OverflowError or a
967 # MemoryError with very large embedded files.
968 f = self.open(zinfo.filename, "r")
969 while f.read(chunk_size): # Check CRC-32
970 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000971 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000972 return zinfo.filename
973
974 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000975 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000976 info = self.NameToInfo.get(name)
977 if info is None:
978 raise KeyError(
979 'There is no item named %r in the archive' % name)
980
981 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000982
Thomas Wouterscf297e42007-02-23 15:07:44 +0000983 def setpassword(self, pwd):
984 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000985 if pwd and not isinstance(pwd, bytes):
986 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
987 if pwd:
988 self.pwd = pwd
989 else:
990 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000991
R David Murrayf50b38a2012-04-12 18:44:58 -0400992 @property
993 def comment(self):
994 """The comment text associated with the ZIP file."""
995 return self._comment
996
997 @comment.setter
998 def comment(self, comment):
999 if not isinstance(comment, bytes):
1000 raise TypeError("comment: expected bytes, got %s" % type(comment))
1001 # check for valid comment length
1002 if len(comment) >= ZIP_MAX_COMMENT:
1003 if self.debug:
1004 print('Archive comment is too long; truncating to %d bytes'
1005 % ZIP_MAX_COMMENT)
1006 comment = comment[:ZIP_MAX_COMMENT]
1007 self._comment = comment
1008 self._didModify = True
1009
Thomas Wouterscf297e42007-02-23 15:07:44 +00001010 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001011 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001012 with self.open(name, "r", pwd) as fp:
1013 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014
1015 def open(self, name, mode="r", pwd=None):
1016 """Return file-like object for 'name'."""
1017 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001018 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +00001019 if pwd and not isinstance(pwd, bytes):
1020 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001022 raise RuntimeError(
1023 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001024
Guido van Rossumd8faa362007-04-27 19:54:29 +00001025 # Only open a new file for instances where we were not
1026 # given a file object in the constructor
1027 if self._filePassed:
1028 zef_file = self.fp
1029 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001030 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031
Georg Brandlb533e262008-05-25 18:19:30 +00001032 # Make sure we have an info object
1033 if isinstance(name, ZipInfo):
1034 # 'name' is already an info object
1035 zinfo = name
1036 else:
1037 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001038 try:
1039 zinfo = self.getinfo(name)
1040 except KeyError:
1041 if not self._filePassed:
1042 zef_file.close()
1043 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001044 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045
1046 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001047 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +00001048 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +00001049 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001050
1051 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001053 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001054 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001055
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001056 if zinfo.flag_bits & 0x20:
1057 # Zip 2.7: compressed patched data
1058 raise NotImplementedError("compressed patched data (flag bit 5)")
1059
Georg Brandl5ba11de2011-01-01 10:09:32 +00001060 if zinfo.flag_bits & 0x800:
1061 # UTF-8 filename
1062 fname_str = fname.decode("utf-8")
1063 else:
1064 fname_str = fname.decode("cp437")
1065
1066 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001067 if not self._filePassed:
1068 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +00001069 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +00001070 'File name in directory %r and header %r differ.'
1071 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001072
Guido van Rossumd8faa362007-04-27 19:54:29 +00001073 # check for encrypted flag & handle password
1074 is_encrypted = zinfo.flag_bits & 0x1
1075 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001076 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001077 if not pwd:
1078 pwd = self.pwd
1079 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001080 if not self._filePassed:
1081 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +00001082 raise RuntimeError("File %s is encrypted, "
1083 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001084
Thomas Wouterscf297e42007-02-23 15:07:44 +00001085 zd = _ZipDecrypter(pwd)
1086 # The first 12 bytes in the cypher stream is an encryption header
1087 # used to strengthen the algorithm. The first 11 bytes are
1088 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +00001089 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +00001090 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +00001091 header = zef_file.read(12)
1092 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +00001093 if zinfo.flag_bits & 0x8:
1094 # compare against the file type from extended local headers
1095 check_byte = (zinfo._raw_time >> 8) & 0xff
1096 else:
1097 # compare against the CRC otherwise
1098 check_byte = (zinfo.CRC >> 24) & 0xff
1099 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001100 if not self._filePassed:
1101 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +00001102 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001103
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001104 return ZipExtFile(zef_file, mode, zinfo, zd,
1105 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106
Christian Heimes790c8232008-01-07 21:14:23 +00001107 def extract(self, member, path=None, pwd=None):
1108 """Extract a member from the archive to the current working directory,
1109 using its full name. Its file information is extracted as accurately
1110 as possible. `member' may be a filename or a ZipInfo object. You can
1111 specify a different directory using `path'.
1112 """
1113 if not isinstance(member, ZipInfo):
1114 member = self.getinfo(member)
1115
1116 if path is None:
1117 path = os.getcwd()
1118
1119 return self._extract_member(member, path, pwd)
1120
1121 def extractall(self, path=None, members=None, pwd=None):
1122 """Extract all members from the archive to the current working
1123 directory. `path' specifies a different directory to extract to.
1124 `members' is optional and must be a subset of the list returned
1125 by namelist().
1126 """
1127 if members is None:
1128 members = self.namelist()
1129
1130 for zipinfo in members:
1131 self.extract(zipinfo, path, pwd)
1132
1133 def _extract_member(self, member, targetpath, pwd):
1134 """Extract the ZipInfo object 'member' to a physical
1135 file on the path targetpath.
1136 """
1137 # build the destination pathname, replacing
1138 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001139 # Strip trailing path separator, unless it represents the root.
1140 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1141 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001142 targetpath = targetpath[:-1]
1143
1144 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001145 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001146 targetpath = os.path.join(targetpath, member.filename[1:])
1147 else:
1148 targetpath = os.path.join(targetpath, member.filename)
1149
1150 targetpath = os.path.normpath(targetpath)
1151
1152 # Create all upper directories if necessary.
1153 upperdirs = os.path.dirname(targetpath)
1154 if upperdirs and not os.path.exists(upperdirs):
1155 os.makedirs(upperdirs)
1156
Martin v. Löwis59e47792009-01-24 14:10:07 +00001157 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001158 if not os.path.isdir(targetpath):
1159 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001160 return targetpath
1161
Georg Brandlb533e262008-05-25 18:19:30 +00001162 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001163 target = open(targetpath, "wb")
1164 shutil.copyfileobj(source, target)
1165 source.close()
1166 target.close()
1167
1168 return targetpath
1169
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001170 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001171 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001172 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001173 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001174 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001176 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001177 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001178 raise RuntimeError(
1179 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001180 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001181 if zinfo.file_size > ZIP64_LIMIT:
1182 if not self._allowZip64:
1183 raise LargeZipFile("Filesize would require ZIP64 extensions")
1184 if zinfo.header_offset > ZIP64_LIMIT:
1185 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001186 raise LargeZipFile(
1187 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188
1189 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001190 """Put the bytes from filename into the archive under the name
1191 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001192 if not self.fp:
1193 raise RuntimeError(
1194 "Attempt to write to ZIP archive that was already closed")
1195
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001196 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001197 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001198 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001199 date_time = mtime[0:6]
1200 # Create ZipInfo instance to store file information
1201 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001202 arcname = filename
1203 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1204 while arcname[0] in (os.sep, os.altsep):
1205 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001206 if isdir:
1207 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001208 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001209 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001211 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001212 else:
Tim Peterse1190062001-01-15 03:34:38 +00001213 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001214
1215 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001216 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001217 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001218
1219 self._writecheck(zinfo)
1220 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001221
1222 if isdir:
1223 zinfo.file_size = 0
1224 zinfo.compress_size = 0
1225 zinfo.CRC = 0
1226 self.filelist.append(zinfo)
1227 self.NameToInfo[zinfo.filename] = zinfo
1228 self.fp.write(zinfo.FileHeader())
1229 return
1230
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001231 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001232 with open(filename, "rb") as fp:
1233 # Must overwrite CRC and sizes with correct data later
1234 zinfo.CRC = CRC = 0
1235 zinfo.compress_size = compress_size = 0
1236 zinfo.file_size = file_size = 0
1237 self.fp.write(zinfo.FileHeader())
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001238 while 1:
1239 buf = fp.read(1024 * 8)
1240 if not buf:
1241 break
1242 file_size = file_size + len(buf)
1243 CRC = crc32(buf, CRC) & 0xffffffff
1244 if cmpr:
1245 buf = cmpr.compress(buf)
1246 compress_size = compress_size + len(buf)
1247 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001248 if cmpr:
1249 buf = cmpr.flush()
1250 compress_size = compress_size + len(buf)
1251 self.fp.write(buf)
1252 zinfo.compress_size = compress_size
1253 else:
1254 zinfo.compress_size = file_size
1255 zinfo.CRC = CRC
1256 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001257 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001258 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001259 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001260 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001262 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263 self.filelist.append(zinfo)
1264 self.NameToInfo[zinfo.filename] = zinfo
1265
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001266 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001267 """Write a file into the archive. The contents is 'data', which
1268 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1269 it is encoded as UTF-8 first.
1270 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001271 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001272 if isinstance(data, str):
1273 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001274 if not isinstance(zinfo_or_arcname, ZipInfo):
1275 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001276 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001277 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001278 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001279 else:
1280 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001281
1282 if not self.fp:
1283 raise RuntimeError(
1284 "Attempt to write to ZIP archive that was already closed")
1285
Guido van Rossum85825dc2007-08-27 17:03:28 +00001286 zinfo.file_size = len(data) # Uncompressed size
1287 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001288 if compress_type is not None:
1289 zinfo.compress_type = compress_type
1290
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001291 self._writecheck(zinfo)
1292 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001293 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001294 co = _get_compressor(zinfo.compress_type)
1295 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001296 data = co.compress(data) + co.flush()
1297 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 else:
1299 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001300 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001302 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001303 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001305 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001306 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001307 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 self.filelist.append(zinfo)
1309 self.NameToInfo[zinfo.filename] = zinfo
1310
1311 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001312 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001313 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314
1315 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001316 """Close the file, and for mode "w" and "a" write the ending
1317 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001318 if self.fp is None:
1319 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001320
1321 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001322 count = 0
1323 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001324 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 count = count + 1
1326 dt = zinfo.date_time
1327 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001328 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001329 extra = []
1330 if zinfo.file_size > ZIP64_LIMIT \
1331 or zinfo.compress_size > ZIP64_LIMIT:
1332 extra.append(zinfo.file_size)
1333 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001334 file_size = 0xffffffff
1335 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001336 else:
1337 file_size = zinfo.file_size
1338 compress_size = zinfo.compress_size
1339
1340 if zinfo.header_offset > ZIP64_LIMIT:
1341 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001342 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343 else:
1344 header_offset = zinfo.header_offset
1345
1346 extra_data = zinfo.extra
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001347 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001348 if extra:
1349 # Append a ZIP64 field to the extra's
1350 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001351 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001352 1, 8*len(extra), *extra) + extra_data
1353
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001354 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001355
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001356 if zinfo.compress_type == ZIP_BZIP2:
1357 min_version = max(BZIP2_VERSION, min_version)
1358
1359 extract_version = max(min_version, zinfo.extract_version)
1360 create_version = max(min_version, zinfo.create_version)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001361 try:
1362 filename, flag_bits = zinfo._encodeFilenameFlags()
1363 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001364 stringCentralDir, create_version,
1365 zinfo.create_system, extract_version, zinfo.reserved,
1366 flag_bits, zinfo.compress_type, dostime, dosdate,
1367 zinfo.CRC, compress_size, file_size,
1368 len(filename), len(extra_data), len(zinfo.comment),
1369 0, zinfo.internal_attr, zinfo.external_attr,
1370 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001371 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001372 print((structCentralDir, stringCentralDir, create_version,
1373 zinfo.create_system, extract_version, zinfo.reserved,
1374 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1375 zinfo.CRC, compress_size, file_size,
1376 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1377 0, zinfo.internal_attr, zinfo.external_attr,
1378 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001379 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001380 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001381 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001382 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001383 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001384
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001385 pos2 = self.fp.tell()
1386 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001387 centDirCount = count
1388 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001389 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001390 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1391 centDirOffset > ZIP64_LIMIT or
1392 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001393 # Need to write the ZIP64 end-of-archive records
1394 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001395 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001396 44, 45, 45, 0, 0, centDirCount, centDirCount,
1397 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001398 self.fp.write(zip64endrec)
1399
1400 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001401 structEndArchive64Locator,
1402 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001403 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001404 centDirCount = min(centDirCount, 0xFFFF)
1405 centDirSize = min(centDirSize, 0xFFFFFFFF)
1406 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001407
Georg Brandl2ee470f2008-07-16 12:55:28 +00001408 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001409 0, 0, centDirCount, centDirCount,
R David Murrayf50b38a2012-04-12 18:44:58 -04001410 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001411 self.fp.write(endrec)
R David Murrayf50b38a2012-04-12 18:44:58 -04001412 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001413 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001414
Fred Drake3d9091e2001-03-26 15:49:24 +00001415 if not self._filePassed:
1416 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001417 self.fp = None
1418
1419
1420class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001421 """Class to create ZIP archives with Python library files and packages."""
1422
Georg Brandl8334fd92010-12-04 10:26:46 +00001423 def __init__(self, file, mode="r", compression=ZIP_STORED,
1424 allowZip64=False, optimize=-1):
1425 ZipFile.__init__(self, file, mode=mode, compression=compression,
1426 allowZip64=allowZip64)
1427 self._optimize = optimize
1428
Georg Brandlfe991052009-09-16 15:54:04 +00001429 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001430 """Add all files from "pathname" to the ZIP archive.
1431
Fred Drake484d7352000-10-02 21:14:52 +00001432 If pathname is a package directory, search the directory and
1433 all package subdirectories recursively for all *.py and enter
1434 the modules into the archive. If pathname is a plain
1435 directory, listdir *.py and enter all modules. Else, pathname
1436 must be a Python *.py file and the module will be put into the
1437 archive. Added modules are always module.pyo or module.pyc.
1438 This method will compile the module.py into module.pyc if
1439 necessary.
1440 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001441 dir, name = os.path.split(pathname)
1442 if os.path.isdir(pathname):
1443 initname = os.path.join(pathname, "__init__.py")
1444 if os.path.isfile(initname):
1445 # This is a package directory, add it
1446 if basename:
1447 basename = "%s/%s" % (basename, name)
1448 else:
1449 basename = name
1450 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001451 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001452 fname, arcname = self._get_codename(initname[0:-3], basename)
1453 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001454 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001455 self.write(fname, arcname)
1456 dirlist = os.listdir(pathname)
1457 dirlist.remove("__init__.py")
1458 # Add all *.py files and package subdirectories
1459 for filename in dirlist:
1460 path = os.path.join(pathname, filename)
1461 root, ext = os.path.splitext(filename)
1462 if os.path.isdir(path):
1463 if os.path.isfile(os.path.join(path, "__init__.py")):
1464 # This is a package directory, add it
1465 self.writepy(path, basename) # Recursive call
1466 elif ext == ".py":
1467 fname, arcname = self._get_codename(path[0:-3],
1468 basename)
1469 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001470 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001471 self.write(fname, arcname)
1472 else:
1473 # This is NOT a package directory, add its files at top level
1474 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001475 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001476 for filename in os.listdir(pathname):
1477 path = os.path.join(pathname, filename)
1478 root, ext = os.path.splitext(filename)
1479 if ext == ".py":
1480 fname, arcname = self._get_codename(path[0:-3],
1481 basename)
1482 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001483 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001484 self.write(fname, arcname)
1485 else:
1486 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001487 raise RuntimeError(
1488 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001489 fname, arcname = self._get_codename(pathname[0:-3], basename)
1490 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001491 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001492 self.write(fname, arcname)
1493
1494 def _get_codename(self, pathname, basename):
1495 """Return (filename, archivename) for the path.
1496
Fred Drake484d7352000-10-02 21:14:52 +00001497 Given a module name path, return the correct file path and
1498 archive name, compiling if necessary. For example, given
1499 /python/lib/string, return (/python/lib/string.pyc, string).
1500 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001501 def _compile(file, optimize=-1):
1502 import py_compile
1503 if self.debug:
1504 print("Compiling", file)
1505 try:
1506 py_compile.compile(file, doraise=True, optimize=optimize)
1507 except py_compile.PyCompileError as error:
1508 print(err.msg)
1509 return False
1510 return True
1511
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001512 file_py = pathname + ".py"
1513 file_pyc = pathname + ".pyc"
1514 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001515 pycache_pyc = imp.cache_from_source(file_py, True)
1516 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001517 if self._optimize == -1:
1518 # legacy mode: use whatever file is present
1519 if (os.path.isfile(file_pyo) and
1520 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1521 # Use .pyo file.
1522 arcname = fname = file_pyo
1523 elif (os.path.isfile(file_pyc) and
1524 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1525 # Use .pyc file.
1526 arcname = fname = file_pyc
1527 elif (os.path.isfile(pycache_pyc) and
1528 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1529 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1530 # file name in the archive.
1531 fname = pycache_pyc
1532 arcname = file_pyc
1533 elif (os.path.isfile(pycache_pyo) and
1534 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1535 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1536 # file name in the archive.
1537 fname = pycache_pyo
1538 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001539 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001540 # Compile py into PEP 3147 pyc file.
1541 if _compile(file_py):
1542 fname = (pycache_pyc if __debug__ else pycache_pyo)
1543 arcname = (file_pyc if __debug__ else file_pyo)
1544 else:
1545 fname = arcname = file_py
1546 else:
1547 # new mode: use given optimization level
1548 if self._optimize == 0:
1549 fname = pycache_pyc
1550 arcname = file_pyc
1551 else:
1552 fname = pycache_pyo
1553 arcname = file_pyo
1554 if not (os.path.isfile(fname) and
1555 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1556 if not _compile(file_py, optimize=self._optimize):
1557 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001558 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001559 if basename:
1560 archivename = "%s/%s" % (basename, archivename)
1561 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001562
1563
1564def main(args = None):
1565 import textwrap
1566 USAGE=textwrap.dedent("""\
1567 Usage:
1568 zipfile.py -l zipfile.zip # Show listing of a zipfile
1569 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1570 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1571 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1572 """)
1573 if args is None:
1574 args = sys.argv[1:]
1575
1576 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001577 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001578 sys.exit(1)
1579
1580 if args[0] == '-l':
1581 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001582 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001583 sys.exit(1)
1584 zf = ZipFile(args[1], 'r')
1585 zf.printdir()
1586 zf.close()
1587
1588 elif args[0] == '-t':
1589 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001590 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001591 sys.exit(1)
1592 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001593 badfile = zf.testzip()
1594 if badfile:
1595 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001596 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001597
1598 elif args[0] == '-e':
1599 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001600 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001601 sys.exit(1)
1602
1603 zf = ZipFile(args[1], 'r')
1604 out = args[2]
1605 for path in zf.namelist():
1606 if path.startswith('./'):
1607 tgt = os.path.join(out, path[2:])
1608 else:
1609 tgt = os.path.join(out, path)
1610
1611 tgtdir = os.path.dirname(tgt)
1612 if not os.path.exists(tgtdir):
1613 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001614 with open(tgt, 'wb') as fp:
1615 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001616 zf.close()
1617
1618 elif args[0] == '-c':
1619 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001620 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001621 sys.exit(1)
1622
1623 def addToZip(zf, path, zippath):
1624 if os.path.isfile(path):
1625 zf.write(path, zippath, ZIP_DEFLATED)
1626 elif os.path.isdir(path):
1627 for nm in os.listdir(path):
1628 addToZip(zf,
1629 os.path.join(path, nm), os.path.join(zippath, nm))
1630 # else: ignore
1631
1632 zf = ZipFile(args[1], 'w', allowZip64=True)
1633 for src in args[2:]:
1634 addToZip(zf, src, os.path.basename(src))
1635
1636 zf.close()
1637
1638if __name__ == "__main__":
1639 main()