blob: fad52a246d5a194f91939bde3c17826760a827dc [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Brett Cannonb57a0852013-06-15 17:32:30 -04009import importlib.util
Barry Warsaw28a691b2010-04-17 00:19:56 +000010import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Brett Cannon260fbe82013-07-04 18:16:15 -040021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040027except ImportError:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020028 bz2 = None
29
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020030try:
31 import lzma # We may need its compression method
Brett Cannon260fbe82013-07-04 18:16:15 -040032except ImportError:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020033 lzma = None
34
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020035__all__ = ["BadZipFile", "BadZipfile", "error",
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020036 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
Georg Brandl4d540882010-10-28 06:42:33 +000037 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000038
Georg Brandl4d540882010-10-28 06:42:33 +000039class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000041
42
43class LargeZipFile(Exception):
44 """
45 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
46 and those extensions are disabled.
47 """
48
Georg Brandl4d540882010-10-28 06:42:33 +000049error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
50
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000052ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053ZIP_FILECOUNT_LIMIT = 1 << 16
54ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Guido van Rossum32abe6f2000-03-31 17:30:02 +000056# constants for Zip file compression methods
57ZIP_STORED = 0
58ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020059ZIP_BZIP2 = 12
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020060ZIP_LZMA = 14
Guido van Rossum32abe6f2000-03-31 17:30:02 +000061# Other ZIP compression methods not supported
62
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020063DEFAULT_VERSION = 20
64ZIP64_VERSION = 45
65BZIP2_VERSION = 46
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020066LZMA_VERSION = 63
Martin v. Löwisd099b562012-05-01 14:08:22 +020067# we recognize (but not necessarily support) all features up to that version
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +020068MAX_EXTRACT_VERSION = 63
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020069
Martin v. Löwisb09b8442008-07-03 14:13:42 +000070# Below are some formats and associated data for reading/writing headers using
71# the struct module. The names and structures of headers/records are those used
72# in the PKWARE description of the ZIP file format:
73# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
74# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000075
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076# The "end of central directory" structure, magic number, size, and indices
77# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000078structEndArchive = b"<4s4H2LH"
79stringEndArchive = b"PK\005\006"
80sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000081
82_ECD_SIGNATURE = 0
83_ECD_DISK_NUMBER = 1
84_ECD_DISK_START = 2
85_ECD_ENTRIES_THIS_DISK = 3
86_ECD_ENTRIES_TOTAL = 4
87_ECD_SIZE = 5
88_ECD_OFFSET = 6
89_ECD_COMMENT_SIZE = 7
90# These last two indices are not part of the structure as defined in the
91# spec, but they are used internally by this module as a convenience
92_ECD_COMMENT = 8
93_ECD_LOCATION = 9
94
95# The "central directory" structure, magic number, size, and indices
96# of entries in the structure (section V.F in the format document)
97structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000098stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000099sizeCentralDir = struct.calcsize(structCentralDir)
100
Fred Drake3e038e52001-02-28 17:56:26 +0000101# indexes of entries in the central directory structure
102_CD_SIGNATURE = 0
103_CD_CREATE_VERSION = 1
104_CD_CREATE_SYSTEM = 2
105_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +0000107_CD_FLAG_BITS = 5
108_CD_COMPRESS_TYPE = 6
109_CD_TIME = 7
110_CD_DATE = 8
111_CD_CRC = 9
112_CD_COMPRESSED_SIZE = 10
113_CD_UNCOMPRESSED_SIZE = 11
114_CD_FILENAME_LENGTH = 12
115_CD_EXTRA_FIELD_LENGTH = 13
116_CD_COMMENT_LENGTH = 14
117_CD_DISK_NUMBER_START = 15
118_CD_INTERNAL_FILE_ATTRIBUTES = 16
119_CD_EXTERNAL_FILE_ATTRIBUTES = 17
120_CD_LOCAL_HEADER_OFFSET = 18
121
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000122# The "local file header" structure, magic number, size, and indices
123# (section V.A in the format document)
124structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000125stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000126sizeFileHeader = struct.calcsize(structFileHeader)
127
Fred Drake3e038e52001-02-28 17:56:26 +0000128_FH_SIGNATURE = 0
129_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000131_FH_GENERAL_PURPOSE_FLAG_BITS = 3
132_FH_COMPRESSION_METHOD = 4
133_FH_LAST_MOD_TIME = 5
134_FH_LAST_MOD_DATE = 6
135_FH_CRC = 7
136_FH_COMPRESSED_SIZE = 8
137_FH_UNCOMPRESSED_SIZE = 9
138_FH_FILENAME_LENGTH = 10
139_FH_EXTRA_FIELD_LENGTH = 11
140
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000141# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000142structEndArchive64Locator = "<4sLQL"
143stringEndArchive64Locator = b"PK\x06\x07"
144sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000145
146# The "Zip64 end of central directory" record, magic number, size, and indices
147# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000148structEndArchive64 = "<4sQ2H2L4Q"
149stringEndArchive64 = b"PK\x06\x06"
150sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000151
152_CD64_SIGNATURE = 0
153_CD64_DIRECTORY_RECSIZE = 1
154_CD64_CREATE_VERSION = 2
155_CD64_EXTRACT_VERSION = 3
156_CD64_DISK_NUMBER = 4
157_CD64_DISK_NUMBER_START = 5
158_CD64_NUMBER_ENTRIES_THIS_DISK = 6
159_CD64_NUMBER_ENTRIES_TOTAL = 7
160_CD64_DIRECTORY_SIZE = 8
161_CD64_OFFSET_START_CENTDIR = 9
162
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000163def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000164 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000165 if _EndRecData(fp):
166 return True # file has correct magic number
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200167 except OSError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000168 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000169 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000170
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000171def is_zipfile(filename):
172 """Quickly see if a file is a ZIP file by checking the magic number.
173
174 The filename argument may be a file or file-like object too.
175 """
176 result = False
177 try:
178 if hasattr(filename, "read"):
179 result = _check_zipfile(fp=filename)
180 else:
181 with open(filename, "rb") as fp:
182 result = _check_zipfile(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200183 except OSError:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000184 pass
185 return result
186
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187def _EndRecData64(fpin, offset, endrec):
188 """
189 Read the ZIP64 end-of-archive records and use that to update endrec
190 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000191 try:
192 fpin.seek(offset - sizeEndCentDir64Locator, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200193 except OSError:
Georg Brandl268e4d42010-10-14 06:59:45 +0000194 # If the seek fails, the file is not large enough to contain a ZIP64
195 # end-of-archive record, so just return the end record we were given.
196 return endrec
197
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200199 if len(data) != sizeEndCentDir64Locator:
200 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000201 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
202 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000206 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207
208 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
210 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200211 if len(data) != sizeEndCentDir64:
212 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000213 sig, sz, create_version, read_version, disk_num, disk_dir, \
Christian Tismer59202e52013-10-21 03:59:23 +0200214 dircount, dircount2, dirsize, diroffset = \
215 struct.unpack(structEndArchive64, data)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000216 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000217 return endrec
218
219 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000220 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221 endrec[_ECD_DISK_NUMBER] = disk_num
222 endrec[_ECD_DISK_START] = disk_dir
223 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
224 endrec[_ECD_ENTRIES_TOTAL] = dircount2
225 endrec[_ECD_SIZE] = dirsize
226 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000227 return endrec
228
229
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000230def _EndRecData(fpin):
231 """Return data from the "End of Central Directory" record, or None.
232
233 The data is a list of the nine items in the ZIP "End of central dir"
234 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Determine file size
237 fpin.seek(0, 2)
238 filesize = fpin.tell()
239
240 # Check to see if this is ZIP file with no archive comment (the
241 # "end of central directory" structure should be the last item in the
242 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000243 try:
244 fpin.seek(-sizeEndCentDir, 2)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200245 except OSError:
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000246 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000247 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200248 if (len(data) == sizeEndCentDir and
249 data[0:4] == stringEndArchive and
250 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000251 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000252 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000253 endrec=list(endrec)
254
255 # Append a blank comment and record start offset
256 endrec.append(b"")
257 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000258
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000259 # Try to read the "Zip64 end of central directory" structure
260 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000261
262 # Either this is not a ZIP file, or it is a ZIP file with an archive
263 # comment. Search the end of the file for the "end of central directory"
264 # record signature. The comment is the last item in the ZIP file and may be
265 # up to 64K long. It is assumed that the "end of central directory" magic
266 # number does not appear in the comment.
267 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
268 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000270 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000271 if start >= 0:
272 # found the magic number; attempt to unpack and interpret
273 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200274 if len(recData) != sizeEndCentDir:
275 # Zip file is corrupted.
276 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000277 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400278 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
279 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
280 endrec.append(comment)
281 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000282
R David Murray4fbb9db2011-06-09 15:50:51 -0400283 # Try to read the "Zip64 end of central directory" structure
284 return _EndRecData64(fpin, maxCommentStart + start - filesize,
285 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000286
287 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200288 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000289
Fred Drake484d7352000-10-02 21:14:52 +0000290
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000292 """Class with attributes describing each file in the ZIP archive."""
293
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 __slots__ = (
Christian Tismer59202e52013-10-21 03:59:23 +0200295 'orig_filename',
296 'filename',
297 'date_time',
298 'compress_type',
299 'comment',
300 'extra',
301 'create_system',
302 'create_version',
303 'extract_version',
304 'reserved',
305 'flag_bits',
306 'volume',
307 'internal_attr',
308 'external_attr',
309 'header_offset',
310 'CRC',
311 'compress_size',
312 'file_size',
313 '_raw_time',
314 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000317 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000318
319 # Terminate the file name at the first null byte. Null bytes in file
320 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000321 null_byte = filename.find(chr(0))
322 if null_byte >= 0:
323 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 # This is used to ensure paths in generated ZIP files always use
325 # forward slashes as the directory separator, as required by the
326 # ZIP format specification.
327 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000328 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000329
Greg Ward8e36d282003-06-18 00:53:06 +0000330 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000331 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800332
333 if date_time[0] < 1980:
334 raise ValueError('ZIP does not support timestamps before 1980')
335
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000337 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000338 self.comment = b"" # Comment for each file
339 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000340 if sys.platform == 'win32':
341 self.create_system = 0 # System which created ZIP archive
342 else:
343 # Assume everything else is unix-y
344 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200345 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
346 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000347 self.reserved = 0 # Must be zero
348 self.flag_bits = 0 # ZIP flag bits
349 self.volume = 0 # Volume number of file header
350 self.internal_attr = 0 # Internal attributes
351 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000352 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000353 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000354 # CRC CRC-32 of the uncompressed file
355 # compress_size Size of the compressed file
356 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000357
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200358 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000359 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000360 dt = self.date_time
361 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000362 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000364 # Set these to zero because we write them after the file data
365 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000366 else:
Tim Peterse1190062001-01-15 03:34:38 +0000367 CRC = self.CRC
368 compress_size = self.compress_size
369 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370
371 extra = self.extra
372
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200373 min_version = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200374 if zip64 is None:
375 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
376 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 extra = extra + struct.pack(fmt,
Christian Tismer59202e52013-10-21 03:59:23 +0200379 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200380 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
381 if not zip64:
382 raise LargeZipFile("Filesize would require ZIP64 extensions")
383 # File is larger than what fits into a 4 byte integer,
384 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000385 file_size = 0xffffffff
386 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200387 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000388
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200389 if self.compress_type == ZIP_BZIP2:
390 min_version = max(BZIP2_VERSION, min_version)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200391 elif self.compress_type == ZIP_LZMA:
392 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200393
394 self.extract_version = max(min_version, self.extract_version)
395 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000396 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000397 header = struct.pack(structFileHeader, stringFileHeader,
Christian Tismer59202e52013-10-21 03:59:23 +0200398 self.extract_version, self.reserved, flag_bits,
399 self.compress_type, dostime, dosdate, CRC,
400 compress_size, file_size,
401 len(filename), len(extra))
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000402 return header + filename + extra
403
404 def _encodeFilenameFlags(self):
405 try:
406 return self.filename.encode('ascii'), self.flag_bits
407 except UnicodeEncodeError:
408 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409
410 def _decodeExtra(self):
411 # Try to decode the extra field.
412 extra = self.extra
413 unpack = struct.unpack
414 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000415 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000416 if tp == 1:
417 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000418 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000419 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000420 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000421 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000422 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000423 elif ln == 0:
424 counts = ()
425 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000426 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000427
428 idx = 0
429
430 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000431 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000432 self.file_size = counts[idx]
433 idx += 1
434
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000435 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000436 self.compress_size = counts[idx]
437 idx += 1
438
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000439 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000440 old = self.header_offset
441 self.header_offset = counts[idx]
442 idx+=1
443
444 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000445
446
Thomas Wouterscf297e42007-02-23 15:07:44 +0000447class _ZipDecrypter:
448 """Class to handle decryption of files stored within a ZIP archive.
449
450 ZIP supports a password-based form of encryption. Even though known
451 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000452 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000453
454 Usage:
455 zd = _ZipDecrypter(mypwd)
456 plain_char = zd(cypher_char)
457 plain_text = map(zd, cypher_text)
458 """
459
460 def _GenerateCRCTable():
461 """Generate a CRC-32 table.
462
463 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
464 internal keys. We noticed that a direct implementation is faster than
465 relying on binascii.crc32().
466 """
467 poly = 0xedb88320
468 table = [0] * 256
469 for i in range(256):
470 crc = i
471 for j in range(8):
472 if crc & 1:
473 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
474 else:
475 crc = ((crc >> 1) & 0x7FFFFFFF)
476 table[i] = crc
477 return table
478 crctable = _GenerateCRCTable()
479
480 def _crc32(self, ch, crc):
481 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000482 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000483
484 def __init__(self, pwd):
485 self.key0 = 305419896
486 self.key1 = 591751049
487 self.key2 = 878082192
488 for p in pwd:
489 self._UpdateKeys(p)
490
491 def _UpdateKeys(self, c):
492 self.key0 = self._crc32(c, self.key0)
493 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
494 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000495 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000496
497 def __call__(self, c):
498 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000499 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000500 k = self.key2 | 2
501 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000502 self._UpdateKeys(c)
503 return c
504
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200505
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200506class LZMACompressor:
507
508 def __init__(self):
509 self._comp = None
510
511 def _init(self):
Nadeem Vawdaa425c3d2012-06-21 23:36:48 +0200512 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200513 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200514 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200515 ])
516 return struct.pack('<BBH', 9, 4, len(props)) + props
517
518 def compress(self, data):
519 if self._comp is None:
520 return self._init() + self._comp.compress(data)
521 return self._comp.compress(data)
522
523 def flush(self):
524 if self._comp is None:
525 return self._init() + self._comp.flush()
526 return self._comp.flush()
527
528
529class LZMADecompressor:
530
531 def __init__(self):
532 self._decomp = None
533 self._unconsumed = b''
534 self.eof = False
535
536 def decompress(self, data):
537 if self._decomp is None:
538 self._unconsumed += data
539 if len(self._unconsumed) <= 4:
540 return b''
541 psize, = struct.unpack('<H', self._unconsumed[2:4])
542 if len(self._unconsumed) <= 4 + psize:
543 return b''
544
545 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
Christian Tismer59202e52013-10-21 03:59:23 +0200546 lzma._decode_filter_properties(lzma.FILTER_LZMA1,
547 self._unconsumed[4:4 + psize])
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200548 ])
549 data = self._unconsumed[4 + psize:]
550 del self._unconsumed
551
552 result = self._decomp.decompress(data)
553 self.eof = self._decomp.eof
554 return result
555
556
557compressor_names = {
558 0: 'store',
559 1: 'shrink',
560 2: 'reduce',
561 3: 'reduce',
562 4: 'reduce',
563 5: 'reduce',
564 6: 'implode',
565 7: 'tokenize',
566 8: 'deflate',
567 9: 'deflate64',
568 10: 'implode',
569 12: 'bzip2',
570 14: 'lzma',
571 18: 'terse',
572 19: 'lz77',
573 97: 'wavpack',
574 98: 'ppmd',
575}
576
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200577def _check_compression(compression):
578 if compression == ZIP_STORED:
579 pass
580 elif compression == ZIP_DEFLATED:
581 if not zlib:
582 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200583 "Compression requires the (missing) zlib module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200584 elif compression == ZIP_BZIP2:
585 if not bz2:
586 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200587 "Compression requires the (missing) bz2 module")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200588 elif compression == ZIP_LZMA:
589 if not lzma:
590 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +0200591 "Compression requires the (missing) lzma module")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200592 else:
593 raise RuntimeError("That compression method is not supported")
594
595
596def _get_compressor(compress_type):
597 if compress_type == ZIP_DEFLATED:
598 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
Christian Tismer59202e52013-10-21 03:59:23 +0200599 zlib.DEFLATED, -15)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200600 elif compress_type == ZIP_BZIP2:
601 return bz2.BZ2Compressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200602 elif compress_type == ZIP_LZMA:
603 return LZMACompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200604 else:
605 return None
606
607
608def _get_decompressor(compress_type):
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200609 if compress_type == ZIP_STORED:
610 return None
611 elif compress_type == ZIP_DEFLATED:
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200612 return zlib.decompressobj(-15)
613 elif compress_type == ZIP_BZIP2:
614 return bz2.BZ2Decompressor()
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200615 elif compress_type == ZIP_LZMA:
616 return LZMADecompressor()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200617 else:
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200618 descr = compressor_names.get(compress_type)
Martin v. Löwisb3260f02012-05-01 08:38:01 +0200619 if descr:
620 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
621 else:
622 raise NotImplementedError("compression type %d" % (compress_type,))
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200623
624
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000625class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000626 """File-like object for reading an archive member.
627 Is returned by ZipFile.open().
628 """
629
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630 # Max size supported by decompressor.
631 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000632
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000633 # Read from compressed files in 4k blocks.
634 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000635
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000636 # Search for universal newlines or line chunks.
637 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
638
Łukasz Langae94980a2010-11-22 23:31:26 +0000639 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
640 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000641 self._fileobj = fileobj
642 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000643 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000644
Ezio Melotti92b47432010-01-28 01:44:41 +0000645 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000646 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200647 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000648
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200649 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000650
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200651 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000652 self._readbuffer = b''
653 self._offset = 0
654
655 self._universal = 'U' in mode
656 self.newlines = None
657
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000658 # Adjust read size for encrypted files since the first 12 bytes
659 # are for the encryption/password information.
660 if self._decrypter is not None:
661 self._compress_left -= 12
662
663 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000664 self.name = zipinfo.filename
665
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000666 if hasattr(zipinfo, 'CRC'):
667 self._expected_crc = zipinfo.CRC
668 self._running_crc = crc32(b'') & 0xffffffff
669 else:
670 self._expected_crc = None
671
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000672 def readline(self, limit=-1):
673 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000674
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000675 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000676 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000677
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000678 if not self._universal and limit < 0:
679 # Shortcut common case - newline found in buffer.
680 i = self._readbuffer.find(b'\n', self._offset) + 1
681 if i > 0:
682 line = self._readbuffer[self._offset: i]
683 self._offset = i
684 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000685
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000686 if not self._universal:
687 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000688
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000689 line = b''
690 while limit < 0 or len(line) < limit:
691 readahead = self.peek(2)
692 if readahead == b'':
693 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000694
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000695 #
696 # Search for universal newlines or line chunks.
697 #
698 # The pattern returns either a line chunk or a newline, but not
699 # both. Combined with peek(2), we are assured that the sequence
700 # '\r\n' is always retrieved completely and never split into
701 # separate newlines - '\r', '\n' due to coincidental readaheads.
702 #
703 match = self.PATTERN.search(readahead)
704 newline = match.group('newline')
705 if newline is not None:
706 if self.newlines is None:
707 self.newlines = []
708 if newline not in self.newlines:
709 self.newlines.append(newline)
710 self._offset += len(newline)
711 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000712
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000713 chunk = match.group('chunk')
714 if limit >= 0:
715 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000716
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000717 self._offset += len(chunk)
718 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000719
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000720 return line
721
722 def peek(self, n=1):
723 """Returns buffered bytes without advancing the position."""
724 if n > len(self._readbuffer) - self._offset:
725 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200726 if len(chunk) > self._offset:
727 self._readbuffer = chunk + self._readbuffer[self._offset:]
728 self._offset = 0
729 else:
730 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000731
732 # Return up to 512 bytes to reduce allocation overhead for tight loops.
733 return self._readbuffer[self._offset: self._offset + 512]
734
735 def readable(self):
736 return True
737
738 def read(self, n=-1):
739 """Read and return up to n bytes.
740 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000741 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200742 if n is None or n < 0:
743 buf = self._readbuffer[self._offset:]
744 self._readbuffer = b''
745 self._offset = 0
746 while not self._eof:
747 buf += self._read1(self.MAX_N)
748 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000749
Antoine Pitrou78157b32012-06-23 16:44:48 +0200750 end = n + self._offset
751 if end < len(self._readbuffer):
752 buf = self._readbuffer[self._offset:end]
753 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200754 return buf
755
Antoine Pitrou78157b32012-06-23 16:44:48 +0200756 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200757 buf = self._readbuffer[self._offset:]
758 self._readbuffer = b''
759 self._offset = 0
760 while n > 0 and not self._eof:
761 data = self._read1(n)
762 if n < len(data):
763 self._readbuffer = data
764 self._offset = n
765 buf += data[:n]
766 break
767 buf += data
768 n -= len(data)
769 return buf
770
771 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000772 # Update the CRC using the given data.
773 if self._expected_crc is None:
774 # No need to compute the CRC if we don't have a reference value
775 return
776 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
777 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200778 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000779 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000780
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000781 def read1(self, n):
782 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000783
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200784 if n is None or n < 0:
785 buf = self._readbuffer[self._offset:]
786 self._readbuffer = b''
787 self._offset = 0
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300788 while not self._eof:
789 data = self._read1(self.MAX_N)
790 if data:
791 buf += data
792 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200793 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794
Antoine Pitrou78157b32012-06-23 16:44:48 +0200795 end = n + self._offset
796 if end < len(self._readbuffer):
797 buf = self._readbuffer[self._offset:end]
798 self._offset = end
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200799 return buf
800
Antoine Pitrou78157b32012-06-23 16:44:48 +0200801 n = end - len(self._readbuffer)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200802 buf = self._readbuffer[self._offset:]
803 self._readbuffer = b''
804 self._offset = 0
805 if n > 0:
Serhiy Storchakad2c07a52013-09-27 22:11:57 +0300806 while not self._eof:
807 data = self._read1(n)
808 if n < len(data):
809 self._readbuffer = data
810 self._offset = n
811 buf += data[:n]
812 break
813 if data:
814 buf += data
815 break
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200816 return buf
817
818 def _read1(self, n):
819 # Read up to n compressed bytes with at most one read() system call,
820 # decrypt and decompress them.
821 if self._eof or n <= 0:
822 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000824 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200825 if self._compress_type == ZIP_DEFLATED:
826 ## Handle unconsumed data.
827 data = self._decompressor.unconsumed_tail
828 if n > len(data):
829 data += self._read2(n - len(data))
830 else:
831 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200833 if self._compress_type == ZIP_STORED:
834 self._eof = self._compress_left <= 0
835 elif self._compress_type == ZIP_DEFLATED:
836 n = max(n, self.MIN_READ_SIZE)
837 data = self._decompressor.decompress(data, n)
838 self._eof = (self._decompressor.eof or
Christian Tismer59202e52013-10-21 03:59:23 +0200839 self._compress_left <= 0 and
840 not self._decompressor.unconsumed_tail)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200841 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000842 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200843 else:
844 data = self._decompressor.decompress(data)
845 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200847 data = data[:self._left]
848 self._left -= len(data)
849 if self._left <= 0:
850 self._eof = True
851 self._update_crc(data)
852 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000853
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200854 def _read2(self, n):
855 if self._compress_left <= 0:
856 return b''
857
858 n = max(n, self.MIN_READ_SIZE)
859 n = min(n, self._compress_left)
860
861 data = self._fileobj.read(n)
862 self._compress_left -= len(data)
863
864 if self._decrypter is not None:
865 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000866 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
Łukasz Langae94980a2010-11-22 23:31:26 +0000868 def close(self):
869 try:
870 if self._close_fileobj:
871 self._fileobj.close()
872 finally:
873 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000875
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000876class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000877 """ Class with methods to open, read, write, close, list zip files.
878
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200879 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
Tim Petersa19a1682001-03-29 04:36:09 +0000880
Fred Drake3d9091e2001-03-26 15:49:24 +0000881 file: Either the path to the file, or a file-like object.
882 If it is a path, the file will be opened and closed by ZipFile.
883 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +0200884 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
885 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000886 allowZip64: if True ZipFile will create files with ZIP64 extensions when
887 needed, otherwise it will raise an exception when this would
888 be necessary.
889
Fred Drake3d9091e2001-03-26 15:49:24 +0000890 """
Fred Drake484d7352000-10-02 21:14:52 +0000891
Fred Drake90eac282001-02-28 05:29:34 +0000892 fp = None # Set here since __del__ checks it
Gregory P. Smith09aa7522013-02-03 00:36:32 -0800893 _windows_illegal_name_trans_table = None
Fred Drake90eac282001-02-28 05:29:34 +0000894
Serhiy Storchaka235c5e02013-11-23 15:55:38 +0200895 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
Fred Drake484d7352000-10-02 21:14:52 +0000896 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000897 if mode not in ("r", "w", "a"):
898 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
899
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200900 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000901
902 self._allowZip64 = allowZip64
903 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000904 self.debug = 0 # Level of printing: 0 through 3
905 self.NameToInfo = {} # Find file info given name
906 self.filelist = [] # List of ZipInfo instances for archive
907 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000908 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000909 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400910 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000911
Fred Drake3d9091e2001-03-26 15:49:24 +0000912 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000913 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000914 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000915 self._filePassed = 0
916 self.filename = file
917 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000918 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000919 self.fp = io.open(file, modeDict[mode])
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200920 except OSError:
Thomas Wouterscf297e42007-02-23 15:07:44 +0000921 if mode == 'a':
922 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000923 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000924 else:
925 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000926 else:
927 self._filePassed = 1
928 self.fp = file
929 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000930
Antoine Pitrou17babc52012-11-17 23:50:08 +0100931 try:
932 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000933 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100934 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000935 # set the modified flag so central directory gets written
936 # even if no files are added to the archive
937 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100938 elif key == 'a':
939 try:
940 # See if file is a zip file
941 self._RealGetContents()
942 # seek to start of directory and overwrite
943 self.fp.seek(self.start_dir, 0)
944 except BadZipFile:
945 # file is not a zip file, just append
946 self.fp.seek(0, 2)
947
948 # set the modified flag so central directory gets written
949 # even if no files are added to the archive
950 self._didModify = True
951 else:
952 raise RuntimeError('Mode must be "r", "w" or "a"')
953 except:
954 fp = self.fp
955 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000956 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100957 fp.close()
958 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000959
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000960 def __enter__(self):
961 return self
962
963 def __exit__(self, type, value, traceback):
964 self.close()
965
Tim Peters7d3bad62001-04-04 18:56:49 +0000966 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000967 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000968 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000969 try:
970 endrec = _EndRecData(fp)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200971 except OSError:
Georg Brandl4d540882010-10-28 06:42:33 +0000972 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000973 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000974 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000975 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000976 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000977 size_cd = endrec[_ECD_SIZE] # bytes in central directory
978 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400979 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000980
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000981 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000982 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000983 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
984 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000985 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
986
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000987 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000988 inferred = concat + offset_cd
989 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 # self.start_dir: Position of start of central directory
991 self.start_dir = offset_cd + concat
992 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000993 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000994 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 total = 0
996 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000997 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200998 if len(centdir) != sizeCentralDir:
999 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001000 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001001 if centdir[_CD_SIGNATURE] != stringCentralDir:
1002 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001004 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +00001005 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001006 flags = centdir[5]
1007 if flags & 0x800:
1008 # UTF-8 file names extension
1009 filename = filename.decode('utf-8')
1010 else:
1011 # Historical ZIP filename encoding
1012 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001013 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001014 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +00001015 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1016 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001017 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001018 (x.create_version, x.create_system, x.extract_version, x.reserved,
Christian Tismer59202e52013-10-21 03:59:23 +02001019 x.flag_bits, x.compress_type, t, d,
1020 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
Martin v. Löwisd099b562012-05-01 14:08:22 +02001021 if x.extract_version > MAX_EXTRACT_VERSION:
1022 raise NotImplementedError("zip file version %.1f" %
1023 (x.extract_version / 10))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1025 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +00001026 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001027 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Christian Tismer59202e52013-10-21 03:59:23 +02001028 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001029
1030 x._decodeExtra()
1031 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 self.filelist.append(x)
1033 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001034
1035 # update total bytes read from central directory
1036 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1037 + centdir[_CD_EXTRA_FIELD_LENGTH]
1038 + centdir[_CD_COMMENT_LENGTH])
1039
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001040 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001041 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001042
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043
1044 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001045 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -06001046 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001047
1048 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +00001049 """Return a list of class ZipInfo instances for files in the
1050 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001051 return self.filelist
1052
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001053 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +00001054 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001055 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
1056 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001057 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001058 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001059 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1060 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061
1062 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +00001063 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001064 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065 for zinfo in self.filelist:
1066 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +00001067 # Read by chunks, to avoid an OverflowError or a
1068 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +01001069 with self.open(zinfo.filename, "r") as f:
1070 while f.read(chunk_size): # Check CRC-32
1071 pass
Georg Brandl4d540882010-10-28 06:42:33 +00001072 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 return zinfo.filename
1074
1075 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +00001076 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001077 info = self.NameToInfo.get(name)
1078 if info is None:
1079 raise KeyError(
1080 'There is no item named %r in the archive' % name)
1081
1082 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001083
Thomas Wouterscf297e42007-02-23 15:07:44 +00001084 def setpassword(self, pwd):
1085 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +00001086 if pwd and not isinstance(pwd, bytes):
1087 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
1088 if pwd:
1089 self.pwd = pwd
1090 else:
1091 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001092
R David Murrayf50b38a2012-04-12 18:44:58 -04001093 @property
1094 def comment(self):
1095 """The comment text associated with the ZIP file."""
1096 return self._comment
1097
1098 @comment.setter
1099 def comment(self, comment):
1100 if not isinstance(comment, bytes):
1101 raise TypeError("comment: expected bytes, got %s" % type(comment))
1102 # check for valid comment length
1103 if len(comment) >= ZIP_MAX_COMMENT:
1104 if self.debug:
1105 print('Archive comment is too long; truncating to %d bytes'
Christian Tismer59202e52013-10-21 03:59:23 +02001106 % ZIP_MAX_COMMENT)
R David Murrayf50b38a2012-04-12 18:44:58 -04001107 comment = comment[:ZIP_MAX_COMMENT]
1108 self._comment = comment
1109 self._didModify = True
1110
Thomas Wouterscf297e42007-02-23 15:07:44 +00001111 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +00001112 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +00001113 with self.open(name, "r", pwd) as fp:
1114 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001115
1116 def open(self, name, mode="r", pwd=None):
1117 """Return file-like object for 'name'."""
1118 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001119 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Serhiy Storchaka6787a382013-11-23 22:12:06 +02001120 if 'U' in mode:
1121 import warnings
1122 warnings.warn("'U' mode is deprecated",
1123 DeprecationWarning, 2)
R. David Murray8d855d82010-12-21 21:53:37 +00001124 if pwd and not isinstance(pwd, bytes):
1125 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001127 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001128 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001129
Guido van Rossumd8faa362007-04-27 19:54:29 +00001130 # Only open a new file for instances where we were not
1131 # given a file object in the constructor
1132 if self._filePassed:
1133 zef_file = self.fp
1134 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001135 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136
Antoine Pitrou17babc52012-11-17 23:50:08 +01001137 try:
1138 # Make sure we have an info object
1139 if isinstance(name, ZipInfo):
1140 # 'name' is already an info object
1141 zinfo = name
1142 else:
1143 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001144 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001145 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001146
Antoine Pitrou17babc52012-11-17 23:50:08 +01001147 # Skip the file header:
1148 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +02001149 if len(fheader) != sizeFileHeader:
1150 raise BadZipFile("Truncated file header")
1151 fheader = struct.unpack(structFileHeader, fheader)
1152 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001153 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001154
Antoine Pitrou17babc52012-11-17 23:50:08 +01001155 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1156 if fheader[_FH_EXTRA_FIELD_LENGTH]:
1157 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158
Antoine Pitrou8572da52012-11-17 23:52:05 +01001159 if zinfo.flag_bits & 0x20:
1160 # Zip 2.7: compressed patched data
1161 raise NotImplementedError("compressed patched data (flag bit 5)")
Martin v. Löwis2a2ce322012-05-01 08:44:08 +02001162
Antoine Pitrou8572da52012-11-17 23:52:05 +01001163 if zinfo.flag_bits & 0x40:
1164 # strong encryption
1165 raise NotImplementedError("strong encryption (flag bit 6)")
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001166
Antoine Pitrou17babc52012-11-17 23:50:08 +01001167 if zinfo.flag_bits & 0x800:
1168 # UTF-8 filename
1169 fname_str = fname.decode("utf-8")
1170 else:
1171 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +00001172
Antoine Pitrou17babc52012-11-17 23:50:08 +01001173 if fname_str != zinfo.orig_filename:
1174 raise BadZipFile(
1175 'File name in directory %r and header %r differ.'
1176 % (zinfo.orig_filename, fname))
1177
1178 # check for encrypted flag & handle password
1179 is_encrypted = zinfo.flag_bits & 0x1
1180 zd = None
1181 if is_encrypted:
1182 if not pwd:
1183 pwd = self.pwd
1184 if not pwd:
1185 raise RuntimeError("File %s is encrypted, password "
1186 "required for extraction" % name)
1187
1188 zd = _ZipDecrypter(pwd)
1189 # The first 12 bytes in the cypher stream is an encryption header
1190 # used to strengthen the algorithm. The first 11 bytes are
1191 # completely random, while the 12th contains the MSB of the CRC,
1192 # or the MSB of the file time depending on the header type
1193 # and is used to check the correctness of the password.
1194 header = zef_file.read(12)
1195 h = list(map(zd, header[0:12]))
1196 if zinfo.flag_bits & 0x8:
1197 # compare against the file type from extended local headers
1198 check_byte = (zinfo._raw_time >> 8) & 0xff
1199 else:
1200 # compare against the CRC otherwise
1201 check_byte = (zinfo.CRC >> 24) & 0xff
1202 if h[11] != check_byte:
1203 raise RuntimeError("Bad password for file", name)
1204
1205 return ZipExtFile(zef_file, mode, zinfo, zd,
1206 close_fileobj=not self._filePassed)
1207 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001208 if not self._filePassed:
1209 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001210 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211
Christian Heimes790c8232008-01-07 21:14:23 +00001212 def extract(self, member, path=None, pwd=None):
1213 """Extract a member from the archive to the current working directory,
1214 using its full name. Its file information is extracted as accurately
1215 as possible. `member' may be a filename or a ZipInfo object. You can
1216 specify a different directory using `path'.
1217 """
1218 if not isinstance(member, ZipInfo):
1219 member = self.getinfo(member)
1220
1221 if path is None:
1222 path = os.getcwd()
1223
1224 return self._extract_member(member, path, pwd)
1225
1226 def extractall(self, path=None, members=None, pwd=None):
1227 """Extract all members from the archive to the current working
1228 directory. `path' specifies a different directory to extract to.
1229 `members' is optional and must be a subset of the list returned
1230 by namelist().
1231 """
1232 if members is None:
1233 members = self.namelist()
1234
1235 for zipinfo in members:
1236 self.extract(zipinfo, path, pwd)
1237
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001238 @classmethod
1239 def _sanitize_windows_name(cls, arcname, pathsep):
1240 """Replace bad characters and remove trailing dots from parts."""
1241 table = cls._windows_illegal_name_trans_table
1242 if not table:
1243 illegal = ':<>|"?*'
1244 table = str.maketrans(illegal, '_' * len(illegal))
1245 cls._windows_illegal_name_trans_table = table
1246 arcname = arcname.translate(table)
1247 # remove trailing dots
1248 arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1249 # rejoin, removing empty parts.
1250 arcname = pathsep.join(x for x in arcname if x)
1251 return arcname
1252
Christian Heimes790c8232008-01-07 21:14:23 +00001253 def _extract_member(self, member, targetpath, pwd):
1254 """Extract the ZipInfo object 'member' to a physical
1255 file on the path targetpath.
1256 """
1257 # build the destination pathname, replacing
1258 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001259 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001260
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001261 if os.path.altsep:
1262 arcname = arcname.replace(os.path.altsep, os.path.sep)
1263 # interpret absolute pathname as relative, remove drive letter or
1264 # UNC path, redundant separators, "." and ".." components.
1265 arcname = os.path.splitdrive(arcname)[1]
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001266 invalid_path_parts = ('', os.path.curdir, os.path.pardir)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001267 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001268 if x not in invalid_path_parts)
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001269 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001270 # filter illegal characters on Windows
Gregory P. Smith09aa7522013-02-03 00:36:32 -08001271 arcname = self._sanitize_windows_name(arcname, os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001272
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001273 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001274 targetpath = os.path.normpath(targetpath)
1275
1276 # Create all upper directories if necessary.
1277 upperdirs = os.path.dirname(targetpath)
1278 if upperdirs and not os.path.exists(upperdirs):
1279 os.makedirs(upperdirs)
1280
Martin v. Löwis59e47792009-01-24 14:10:07 +00001281 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001282 if not os.path.isdir(targetpath):
1283 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001284 return targetpath
1285
Antoine Pitrou17babc52012-11-17 23:50:08 +01001286 with self.open(member, pwd=pwd) as source, \
1287 open(targetpath, "wb") as target:
1288 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001289
1290 return targetpath
1291
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001292 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001293 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001294 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001295 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001296 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001297 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001298 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001299 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001300 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001301 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001302 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001303 if zinfo.file_size > ZIP64_LIMIT:
1304 if not self._allowZip64:
1305 raise LargeZipFile("Filesize would require ZIP64 extensions")
1306 if zinfo.header_offset > ZIP64_LIMIT:
1307 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001308 raise LargeZipFile(
Christian Tismer59202e52013-10-21 03:59:23 +02001309 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310
1311 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001312 """Put the bytes from filename into the archive under the name
1313 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001314 if not self.fp:
1315 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001316 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001317
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001318 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001319 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001320 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 date_time = mtime[0:6]
1322 # Create ZipInfo instance to store file information
1323 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001324 arcname = filename
1325 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1326 while arcname[0] in (os.sep, os.altsep):
1327 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001328 if isdir:
1329 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001330 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001331 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001332 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001333 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001334 else:
Tim Peterse1190062001-01-15 03:34:38 +00001335 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001336
1337 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001338 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001339 zinfo.header_offset = self.fp.tell() # Start of header bytes
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001340 if zinfo.compress_type == ZIP_LZMA:
1341 # Compressed data includes an end-of-stream (EOS) marker
1342 zinfo.flag_bits |= 0x02
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343
1344 self._writecheck(zinfo)
1345 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001346
1347 if isdir:
1348 zinfo.file_size = 0
1349 zinfo.compress_size = 0
1350 zinfo.CRC = 0
1351 self.filelist.append(zinfo)
1352 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001353 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis59e47792009-01-24 14:10:07 +00001354 return
1355
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001356 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001357 with open(filename, "rb") as fp:
1358 # Must overwrite CRC and sizes with correct data later
1359 zinfo.CRC = CRC = 0
1360 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001361 # Compressed size can be larger than uncompressed size
1362 zip64 = self._allowZip64 and \
Christian Tismer59202e52013-10-21 03:59:23 +02001363 zinfo.file_size * 1.05 > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001364 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001365 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001366 while 1:
1367 buf = fp.read(1024 * 8)
1368 if not buf:
1369 break
1370 file_size = file_size + len(buf)
1371 CRC = crc32(buf, CRC) & 0xffffffff
1372 if cmpr:
1373 buf = cmpr.compress(buf)
1374 compress_size = compress_size + len(buf)
1375 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376 if cmpr:
1377 buf = cmpr.flush()
1378 compress_size = compress_size + len(buf)
1379 self.fp.write(buf)
1380 zinfo.compress_size = compress_size
1381 else:
1382 zinfo.compress_size = file_size
1383 zinfo.CRC = CRC
1384 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001385 if not zip64 and self._allowZip64:
1386 if file_size > ZIP64_LIMIT:
1387 raise RuntimeError('File size has increased during compressing')
1388 if compress_size > ZIP64_LIMIT:
1389 raise RuntimeError('Compressed size larger than uncompressed size')
1390 # Seek backwards and write file header (which will now include
1391 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001392 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001393 self.fp.seek(zinfo.header_offset, 0)
1394 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001395 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396 self.filelist.append(zinfo)
1397 self.NameToInfo[zinfo.filename] = zinfo
1398
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001399 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001400 """Write a file into the archive. The contents is 'data', which
1401 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1402 it is encoded as UTF-8 first.
1403 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001404 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001405 if isinstance(data, str):
1406 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001407 if not isinstance(zinfo_or_arcname, ZipInfo):
1408 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001409 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001410 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001411 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001412 else:
1413 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001414
1415 if not self.fp:
1416 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001417 "Attempt to write to ZIP archive that was already closed")
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001418
Guido van Rossum85825dc2007-08-27 17:03:28 +00001419 zinfo.file_size = len(data) # Uncompressed size
1420 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001421 if compress_type is not None:
1422 zinfo.compress_type = compress_type
Martin v. Löwis7fb79fc2012-05-13 10:06:36 +02001423 if zinfo.compress_type == ZIP_LZMA:
1424 # Compressed data includes an end-of-stream (EOS) marker
1425 zinfo.flag_bits |= 0x02
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001426
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001427 self._writecheck(zinfo)
1428 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001429 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001430 co = _get_compressor(zinfo.compress_type)
1431 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001432 data = co.compress(data) + co.flush()
1433 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001434 else:
1435 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001436 zip64 = zinfo.file_size > ZIP64_LIMIT or \
Christian Tismer59202e52013-10-21 03:59:23 +02001437 zinfo.compress_size > ZIP64_LIMIT
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001438 if zip64 and not self._allowZip64:
1439 raise LargeZipFile("Filesize would require ZIP64 extensions")
1440 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001441 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001442 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001443 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001444 fmt = '<LQQ' if zip64 else '<LLL'
1445 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Christian Tismer59202e52013-10-21 03:59:23 +02001446 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001447 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001448 self.filelist.append(zinfo)
1449 self.NameToInfo[zinfo.filename] = zinfo
1450
1451 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001452 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001453 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001454
1455 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001456 """Close the file, and for mode "w" and "a" write the ending
1457 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001458 if self.fp is None:
1459 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001460
Antoine Pitrou17babc52012-11-17 23:50:08 +01001461 try:
1462 if self.mode in ("w", "a") and self._didModify: # write ending records
1463 count = 0
1464 pos1 = self.fp.tell()
1465 for zinfo in self.filelist: # write central directory
1466 count = count + 1
1467 dt = zinfo.date_time
1468 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1469 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1470 extra = []
1471 if zinfo.file_size > ZIP64_LIMIT \
Christian Tismer59202e52013-10-21 03:59:23 +02001472 or zinfo.compress_size > ZIP64_LIMIT:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001473 extra.append(zinfo.file_size)
1474 extra.append(zinfo.compress_size)
1475 file_size = 0xffffffff
1476 compress_size = 0xffffffff
1477 else:
1478 file_size = zinfo.file_size
1479 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001480
Antoine Pitrou17babc52012-11-17 23:50:08 +01001481 if zinfo.header_offset > ZIP64_LIMIT:
1482 extra.append(zinfo.header_offset)
1483 header_offset = 0xffffffff
1484 else:
1485 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001486
Antoine Pitrou17babc52012-11-17 23:50:08 +01001487 extra_data = zinfo.extra
Antoine Pitrou8572da52012-11-17 23:52:05 +01001488 min_version = 0
Antoine Pitrou17babc52012-11-17 23:50:08 +01001489 if extra:
1490 # Append a ZIP64 field to the extra's
1491 extra_data = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001492 '<HH' + 'Q'*len(extra),
1493 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001494
Antoine Pitrou8572da52012-11-17 23:52:05 +01001495 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001496
Antoine Pitrou8572da52012-11-17 23:52:05 +01001497 if zinfo.compress_type == ZIP_BZIP2:
1498 min_version = max(BZIP2_VERSION, min_version)
1499 elif zinfo.compress_type == ZIP_LZMA:
1500 min_version = max(LZMA_VERSION, min_version)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001501
Antoine Pitrou8572da52012-11-17 23:52:05 +01001502 extract_version = max(min_version, zinfo.extract_version)
1503 create_version = max(min_version, zinfo.create_version)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001504 try:
1505 filename, flag_bits = zinfo._encodeFilenameFlags()
1506 centdir = struct.pack(structCentralDir,
Christian Tismer59202e52013-10-21 03:59:23 +02001507 stringCentralDir, create_version,
1508 zinfo.create_system, extract_version, zinfo.reserved,
1509 flag_bits, zinfo.compress_type, dostime, dosdate,
1510 zinfo.CRC, compress_size, file_size,
1511 len(filename), len(extra_data), len(zinfo.comment),
1512 0, zinfo.internal_attr, zinfo.external_attr,
1513 header_offset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001514 except DeprecationWarning:
1515 print((structCentralDir, stringCentralDir, create_version,
Christian Tismer59202e52013-10-21 03:59:23 +02001516 zinfo.create_system, extract_version, zinfo.reserved,
1517 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1518 zinfo.CRC, compress_size, file_size,
1519 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1520 0, zinfo.internal_attr, zinfo.external_attr,
1521 header_offset), file=sys.stderr)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001522 raise
1523 self.fp.write(centdir)
1524 self.fp.write(filename)
1525 self.fp.write(extra_data)
1526 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001527
Antoine Pitrou17babc52012-11-17 23:50:08 +01001528 pos2 = self.fp.tell()
1529 # Write end-of-zip-archive record
1530 centDirCount = count
1531 centDirSize = pos2 - pos1
1532 centDirOffset = pos1
1533 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1534 centDirOffset > ZIP64_LIMIT or
1535 centDirSize > ZIP64_LIMIT):
1536 # Need to write the ZIP64 end-of-archive records
1537 zip64endrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001538 structEndArchive64, stringEndArchive64,
1539 44, 45, 45, 0, 0, centDirCount, centDirCount,
1540 centDirSize, centDirOffset)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001541 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001542
Antoine Pitrou17babc52012-11-17 23:50:08 +01001543 zip64locrec = struct.pack(
Christian Tismer59202e52013-10-21 03:59:23 +02001544 structEndArchive64Locator,
1545 stringEndArchive64Locator, 0, pos2, 1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001546 self.fp.write(zip64locrec)
1547 centDirCount = min(centDirCount, 0xFFFF)
1548 centDirSize = min(centDirSize, 0xFFFFFFFF)
1549 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001550
Antoine Pitrou17babc52012-11-17 23:50:08 +01001551 endrec = struct.pack(structEndArchive, stringEndArchive,
Christian Tismer59202e52013-10-21 03:59:23 +02001552 0, 0, centDirCount, centDirCount,
1553 centDirSize, centDirOffset, len(self._comment))
Antoine Pitrou17babc52012-11-17 23:50:08 +01001554 self.fp.write(endrec)
1555 self.fp.write(self._comment)
1556 self.fp.flush()
1557 finally:
1558 fp = self.fp
1559 self.fp = None
1560 if not self._filePassed:
1561 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001562
1563
1564class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001565 """Class to create ZIP archives with Python library files and packages."""
1566
Georg Brandl8334fd92010-12-04 10:26:46 +00001567 def __init__(self, file, mode="r", compression=ZIP_STORED,
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001568 allowZip64=True, optimize=-1):
Georg Brandl8334fd92010-12-04 10:26:46 +00001569 ZipFile.__init__(self, file, mode=mode, compression=compression,
1570 allowZip64=allowZip64)
1571 self._optimize = optimize
1572
Christian Tismer59202e52013-10-21 03:59:23 +02001573 def writepy(self, pathname, basename="", filterfunc=None):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001574 """Add all files from "pathname" to the ZIP archive.
1575
Fred Drake484d7352000-10-02 21:14:52 +00001576 If pathname is a package directory, search the directory and
1577 all package subdirectories recursively for all *.py and enter
1578 the modules into the archive. If pathname is a plain
1579 directory, listdir *.py and enter all modules. Else, pathname
1580 must be a Python *.py file and the module will be put into the
1581 archive. Added modules are always module.pyo or module.pyc.
1582 This method will compile the module.py into module.pyc if
1583 necessary.
Christian Tismer59202e52013-10-21 03:59:23 +02001584 If filterfunc(pathname) is given, it is called with every argument.
1585 When it is False, the file or directory is skipped.
Fred Drake484d7352000-10-02 21:14:52 +00001586 """
Christian Tismer59202e52013-10-21 03:59:23 +02001587 if filterfunc and not filterfunc(pathname):
1588 if self.debug:
Christian Tismer410d9312013-10-22 04:09:28 +02001589 label = 'path' if os.path.isdir(pathname) else 'file'
1590 print('%s "%s" skipped by filterfunc' % (label, pathname))
Christian Tismer59202e52013-10-21 03:59:23 +02001591 return
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001592 dir, name = os.path.split(pathname)
1593 if os.path.isdir(pathname):
1594 initname = os.path.join(pathname, "__init__.py")
1595 if os.path.isfile(initname):
1596 # This is a package directory, add it
1597 if basename:
1598 basename = "%s/%s" % (basename, name)
1599 else:
1600 basename = name
1601 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001602 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001603 fname, arcname = self._get_codename(initname[0:-3], basename)
1604 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001605 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001606 self.write(fname, arcname)
1607 dirlist = os.listdir(pathname)
1608 dirlist.remove("__init__.py")
1609 # Add all *.py files and package subdirectories
1610 for filename in dirlist:
1611 path = os.path.join(pathname, filename)
1612 root, ext = os.path.splitext(filename)
1613 if os.path.isdir(path):
1614 if os.path.isfile(os.path.join(path, "__init__.py")):
1615 # This is a package directory, add it
Christian Tismer59202e52013-10-21 03:59:23 +02001616 self.writepy(path, basename,
1617 filterfunc=filterfunc) # Recursive call
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001618 elif ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001619 if filterfunc and not filterfunc(path):
1620 if self.debug:
1621 print('file "%s" skipped by filterfunc' % path)
1622 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001623 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001624 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001625 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001626 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001627 self.write(fname, arcname)
1628 else:
1629 # This is NOT a package directory, add its files at top level
1630 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001631 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001632 for filename in os.listdir(pathname):
1633 path = os.path.join(pathname, filename)
1634 root, ext = os.path.splitext(filename)
1635 if ext == ".py":
Christian Tismer410d9312013-10-22 04:09:28 +02001636 if filterfunc and not filterfunc(path):
1637 if self.debug:
1638 print('file "%s" skipped by filterfunc' % path)
1639 continue
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001640 fname, arcname = self._get_codename(path[0:-3],
Christian Tismer59202e52013-10-21 03:59:23 +02001641 basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001642 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001643 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001644 self.write(fname, arcname)
1645 else:
1646 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001647 raise RuntimeError(
Christian Tismer59202e52013-10-21 03:59:23 +02001648 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001649 fname, arcname = self._get_codename(pathname[0:-3], basename)
1650 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001651 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001652 self.write(fname, arcname)
1653
1654 def _get_codename(self, pathname, basename):
1655 """Return (filename, archivename) for the path.
1656
Fred Drake484d7352000-10-02 21:14:52 +00001657 Given a module name path, return the correct file path and
1658 archive name, compiling if necessary. For example, given
1659 /python/lib/string, return (/python/lib/string.pyc, string).
1660 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001661 def _compile(file, optimize=-1):
1662 import py_compile
1663 if self.debug:
1664 print("Compiling", file)
1665 try:
1666 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001667 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001668 print(err.msg)
1669 return False
1670 return True
1671
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001672 file_py = pathname + ".py"
1673 file_pyc = pathname + ".pyc"
1674 file_pyo = pathname + ".pyo"
Brett Cannonb57a0852013-06-15 17:32:30 -04001675 pycache_pyc = importlib.util.cache_from_source(file_py, True)
1676 pycache_pyo = importlib.util.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001677 if self._optimize == -1:
1678 # legacy mode: use whatever file is present
1679 if (os.path.isfile(file_pyo) and
1680 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1681 # Use .pyo file.
1682 arcname = fname = file_pyo
1683 elif (os.path.isfile(file_pyc) and
1684 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1685 # Use .pyc file.
1686 arcname = fname = file_pyc
1687 elif (os.path.isfile(pycache_pyc) and
1688 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1689 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1690 # file name in the archive.
1691 fname = pycache_pyc
1692 arcname = file_pyc
1693 elif (os.path.isfile(pycache_pyo) and
1694 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1695 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1696 # file name in the archive.
1697 fname = pycache_pyo
1698 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001699 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001700 # Compile py into PEP 3147 pyc file.
1701 if _compile(file_py):
1702 fname = (pycache_pyc if __debug__ else pycache_pyo)
1703 arcname = (file_pyc if __debug__ else file_pyo)
1704 else:
1705 fname = arcname = file_py
1706 else:
1707 # new mode: use given optimization level
1708 if self._optimize == 0:
1709 fname = pycache_pyc
1710 arcname = file_pyc
1711 else:
1712 fname = pycache_pyo
1713 arcname = file_pyo
1714 if not (os.path.isfile(fname) and
1715 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1716 if not _compile(file_py, optimize=self._optimize):
1717 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001718 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001719 if basename:
1720 archivename = "%s/%s" % (basename, archivename)
1721 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001722
1723
1724def main(args = None):
1725 import textwrap
1726 USAGE=textwrap.dedent("""\
1727 Usage:
1728 zipfile.py -l zipfile.zip # Show listing of a zipfile
1729 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1730 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1731 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1732 """)
1733 if args is None:
1734 args = sys.argv[1:]
1735
1736 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001737 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001738 sys.exit(1)
1739
1740 if args[0] == '-l':
1741 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001742 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001743 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001744 with ZipFile(args[1], 'r') as zf:
1745 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001746
1747 elif args[0] == '-t':
1748 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001749 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001750 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001751 with ZipFile(args[1], 'r') as zf:
1752 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001753 if badfile:
1754 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001755 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001756
1757 elif args[0] == '-e':
1758 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001759 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001760 sys.exit(1)
1761
Antoine Pitrou17babc52012-11-17 23:50:08 +01001762 with ZipFile(args[1], 'r') as zf:
1763 out = args[2]
1764 for path in zf.namelist():
1765 if path.startswith('./'):
1766 tgt = os.path.join(out, path[2:])
1767 else:
1768 tgt = os.path.join(out, path)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001769
Antoine Pitrou17babc52012-11-17 23:50:08 +01001770 tgtdir = os.path.dirname(tgt)
1771 if not os.path.exists(tgtdir):
1772 os.makedirs(tgtdir)
1773 with open(tgt, 'wb') as fp:
1774 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001775
1776 elif args[0] == '-c':
1777 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001778 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001779 sys.exit(1)
1780
1781 def addToZip(zf, path, zippath):
1782 if os.path.isfile(path):
1783 zf.write(path, zippath, ZIP_DEFLATED)
1784 elif os.path.isdir(path):
1785 for nm in os.listdir(path):
1786 addToZip(zf,
Christian Tismer59202e52013-10-21 03:59:23 +02001787 os.path.join(path, nm), os.path.join(zippath, nm))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001788 # else: ignore
1789
Serhiy Storchaka235c5e02013-11-23 15:55:38 +02001790 with ZipFile(args[1], 'w') as zf:
Antoine Pitrou17babc52012-11-17 23:50:08 +01001791 for src in args[2:]:
1792 addToZip(zf, src, os.path.basename(src))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001793
1794if __name__ == "__main__":
1795 main()