blob: b223b4a4c190cbe2e5d7a139a69123685b557829 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200179 if len(data) != sizeEndCentDir64Locator:
180 return endrec
Georg Brandl2ee470f2008-07-16 12:55:28 +0000181 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
182 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000183 return endrec
184
185 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000186 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187
188 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000189 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
190 data = fpin.read(sizeEndCentDir64)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200191 if len(data) != sizeEndCentDir64:
192 return endrec
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 sig, sz, create_version, read_version, disk_num, disk_dir, \
194 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000195 struct.unpack(structEndArchive64, data)
196 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000197 return endrec
198
199 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000200 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000201 endrec[_ECD_DISK_NUMBER] = disk_num
202 endrec[_ECD_DISK_START] = disk_dir
203 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
204 endrec[_ECD_ENTRIES_TOTAL] = dircount2
205 endrec[_ECD_SIZE] = dirsize
206 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000207 return endrec
208
209
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000210def _EndRecData(fpin):
211 """Return data from the "End of Central Directory" record, or None.
212
213 The data is a list of the nine items in the ZIP "End of central dir"
214 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000215
216 # Determine file size
217 fpin.seek(0, 2)
218 filesize = fpin.tell()
219
220 # Check to see if this is ZIP file with no archive comment (the
221 # "end of central directory" structure should be the last item in the
222 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000223 try:
224 fpin.seek(-sizeEndCentDir, 2)
225 except IOError:
226 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200228 if (len(data) == sizeEndCentDir and
229 data[0:4] == stringEndArchive and
230 data[-2:] == b"\000\000"):
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000232 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000233 endrec=list(endrec)
234
235 # Append a blank comment and record start offset
236 endrec.append(b"")
237 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000239 # Try to read the "Zip64 end of central directory" structure
240 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000241
242 # Either this is not a ZIP file, or it is a ZIP file with an archive
243 # comment. Search the end of the file for the "end of central directory"
244 # record signature. The comment is the last item in the ZIP file and may be
245 # up to 64K long. It is assumed that the "end of central directory" magic
246 # number does not appear in the comment.
247 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
248 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000249 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000250 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000251 if start >= 0:
252 # found the magic number; attempt to unpack and interpret
253 recData = data[start:start+sizeEndCentDir]
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200254 if len(recData) != sizeEndCentDir:
255 # Zip file is corrupted.
256 return None
Georg Brandl2ee470f2008-07-16 12:55:28 +0000257 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400258 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
259 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
260 endrec.append(comment)
261 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000262
R David Murray4fbb9db2011-06-09 15:50:51 -0400263 # Try to read the "Zip64 end of central directory" structure
264 return _EndRecData64(fpin, maxCommentStart + start - filesize,
265 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000266
267 # Unable to find a valid end of central directory structure
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200268 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000269
Fred Drake484d7352000-10-02 21:14:52 +0000270
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000271class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000272 """Class with attributes describing each file in the ZIP archive."""
273
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000274 __slots__ = (
275 'orig_filename',
276 'filename',
277 'date_time',
278 'compress_type',
279 'comment',
280 'extra',
281 'create_system',
282 'create_version',
283 'extract_version',
284 'reserved',
285 'flag_bits',
286 'volume',
287 'internal_attr',
288 'external_attr',
289 'header_offset',
290 'CRC',
291 'compress_size',
292 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000293 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000294 )
295
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000296 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000297 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000298
299 # Terminate the file name at the first null byte. Null bytes in file
300 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000301 null_byte = filename.find(chr(0))
302 if null_byte >= 0:
303 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000304 # This is used to ensure paths in generated ZIP files always use
305 # forward slashes as the directory separator, as required by the
306 # ZIP format specification.
307 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000308 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000309
Greg Ward8e36d282003-06-18 00:53:06 +0000310 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000311 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800312
313 if date_time[0] < 1980:
314 raise ValueError('ZIP does not support timestamps before 1980')
315
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000317 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000318 self.comment = b"" # Comment for each file
319 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000320 if sys.platform == 'win32':
321 self.create_system = 0 # System which created ZIP archive
322 else:
323 # Assume everything else is unix-y
324 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000325 self.create_version = 20 # Version which created ZIP archive
326 self.extract_version = 20 # Version needed to extract archive
327 self.reserved = 0 # Must be zero
328 self.flag_bits = 0 # ZIP flag bits
329 self.volume = 0 # Volume number of file header
330 self.internal_attr = 0 # Internal attributes
331 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000332 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000333 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000334 # CRC CRC-32 of the uncompressed file
335 # compress_size Size of the compressed file
336 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200338 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000339 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 dt = self.date_time
341 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000342 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000343 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000344 # Set these to zero because we write them after the file data
345 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000346 else:
Tim Peterse1190062001-01-15 03:34:38 +0000347 CRC = self.CRC
348 compress_size = self.compress_size
349 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350
351 extra = self.extra
352
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200353 if zip64 is None:
354 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
355 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000356 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357 extra = extra + struct.pack(fmt,
358 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200359 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
360 if not zip64:
361 raise LargeZipFile("Filesize would require ZIP64 extensions")
362 # File is larger than what fits into a 4 byte integer,
363 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000364 file_size = 0xffffffff
365 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 self.extract_version = max(45, self.extract_version)
367 self.create_version = max(45, self.extract_version)
368
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000369 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000370 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000371 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000372 self.compress_type, dostime, dosdate, CRC,
373 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000374 len(filename), len(extra))
375 return header + filename + extra
376
377 def _encodeFilenameFlags(self):
378 try:
379 return self.filename.encode('ascii'), self.flag_bits
380 except UnicodeEncodeError:
381 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382
383 def _decodeExtra(self):
384 # Try to decode the extra field.
385 extra = self.extra
386 unpack = struct.unpack
387 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000388 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000389 if tp == 1:
390 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000391 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000392 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000393 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000394 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000395 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396 elif ln == 0:
397 counts = ()
398 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000399 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000400
401 idx = 0
402
403 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000404 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000405 self.file_size = counts[idx]
406 idx += 1
407
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000408 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000409 self.compress_size = counts[idx]
410 idx += 1
411
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000412 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000413 old = self.header_offset
414 self.header_offset = counts[idx]
415 idx+=1
416
417 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000418
419
Thomas Wouterscf297e42007-02-23 15:07:44 +0000420class _ZipDecrypter:
421 """Class to handle decryption of files stored within a ZIP archive.
422
423 ZIP supports a password-based form of encryption. Even though known
424 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000425 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000426
427 Usage:
428 zd = _ZipDecrypter(mypwd)
429 plain_char = zd(cypher_char)
430 plain_text = map(zd, cypher_text)
431 """
432
433 def _GenerateCRCTable():
434 """Generate a CRC-32 table.
435
436 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437 internal keys. We noticed that a direct implementation is faster than
438 relying on binascii.crc32().
439 """
440 poly = 0xedb88320
441 table = [0] * 256
442 for i in range(256):
443 crc = i
444 for j in range(8):
445 if crc & 1:
446 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447 else:
448 crc = ((crc >> 1) & 0x7FFFFFFF)
449 table[i] = crc
450 return table
451 crctable = _GenerateCRCTable()
452
453 def _crc32(self, ch, crc):
454 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000455 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000456
457 def __init__(self, pwd):
458 self.key0 = 305419896
459 self.key1 = 591751049
460 self.key2 = 878082192
461 for p in pwd:
462 self._UpdateKeys(p)
463
464 def _UpdateKeys(self, c):
465 self.key0 = self._crc32(c, self.key0)
466 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000468 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000469
470 def __call__(self, c):
471 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000472 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000473 k = self.key2 | 2
474 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000475 self._UpdateKeys(c)
476 return c
477
Ezio Melotti6a5fc4c2012-11-18 13:20:36 +0200478
479compressor_names = {
480 0: 'store',
481 1: 'shrink',
482 2: 'reduce',
483 3: 'reduce',
484 4: 'reduce',
485 5: 'reduce',
486 6: 'implode',
487 7: 'tokenize',
488 8: 'deflate',
489 9: 'deflate64',
490 10: 'implode',
491 12: 'bzip2',
492 14: 'lzma',
493 18: 'terse',
494 19: 'lz77',
495 97: 'wavpack',
496 98: 'ppmd',
497}
498
499
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000500class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000501 """File-like object for reading an archive member.
502 Is returned by ZipFile.open().
503 """
504
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000505 # Max size supported by decompressor.
506 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000507
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000508 # Read from compressed files in 4k blocks.
509 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000510
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000511 # Search for universal newlines or line chunks.
512 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
513
Łukasz Langae94980a2010-11-22 23:31:26 +0000514 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
515 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000516 self._fileobj = fileobj
517 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000518 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000519
Ezio Melotti92b47432010-01-28 01:44:41 +0000520 self._compress_type = zipinfo.compress_type
521 self._compress_size = zipinfo.compress_size
522 self._compress_left = zipinfo.compress_size
523
524 if self._compress_type == ZIP_DEFLATED:
525 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti6a5fc4c2012-11-18 13:20:36 +0200526 elif self._compress_type != ZIP_STORED:
527 descr = compressor_names.get(self._compress_type)
528 if descr:
529 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
530 else:
531 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000532 self._unconsumed = b''
533
534 self._readbuffer = b''
535 self._offset = 0
536
537 self._universal = 'U' in mode
538 self.newlines = None
539
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000540 # Adjust read size for encrypted files since the first 12 bytes
541 # are for the encryption/password information.
542 if self._decrypter is not None:
543 self._compress_left -= 12
544
545 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000546 self.name = zipinfo.filename
547
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000548 if hasattr(zipinfo, 'CRC'):
549 self._expected_crc = zipinfo.CRC
550 self._running_crc = crc32(b'') & 0xffffffff
551 else:
552 self._expected_crc = None
553
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000554 def readline(self, limit=-1):
555 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000557 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000558 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000560 if not self._universal and limit < 0:
561 # Shortcut common case - newline found in buffer.
562 i = self._readbuffer.find(b'\n', self._offset) + 1
563 if i > 0:
564 line = self._readbuffer[self._offset: i]
565 self._offset = i
566 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000568 if not self._universal:
569 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000570
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000571 line = b''
572 while limit < 0 or len(line) < limit:
573 readahead = self.peek(2)
574 if readahead == b'':
575 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000576
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000577 #
578 # Search for universal newlines or line chunks.
579 #
580 # The pattern returns either a line chunk or a newline, but not
581 # both. Combined with peek(2), we are assured that the sequence
582 # '\r\n' is always retrieved completely and never split into
583 # separate newlines - '\r', '\n' due to coincidental readaheads.
584 #
585 match = self.PATTERN.search(readahead)
586 newline = match.group('newline')
587 if newline is not None:
588 if self.newlines is None:
589 self.newlines = []
590 if newline not in self.newlines:
591 self.newlines.append(newline)
592 self._offset += len(newline)
593 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000594
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000595 chunk = match.group('chunk')
596 if limit >= 0:
597 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000598
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000599 self._offset += len(chunk)
600 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000601
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000602 return line
603
604 def peek(self, n=1):
605 """Returns buffered bytes without advancing the position."""
606 if n > len(self._readbuffer) - self._offset:
607 chunk = self.read(n)
608 self._offset -= len(chunk)
609
610 # Return up to 512 bytes to reduce allocation overhead for tight loops.
611 return self._readbuffer[self._offset: self._offset + 512]
612
613 def readable(self):
614 return True
615
616 def read(self, n=-1):
617 """Read and return up to n bytes.
618 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000619 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000620 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000621 if n is None:
622 n = -1
623 while True:
624 if n < 0:
625 data = self.read1(n)
626 elif n > len(buf):
627 data = self.read1(n - len(buf))
628 else:
629 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630 if len(data) == 0:
631 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000632 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000633
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000634 def _update_crc(self, newdata, eof):
635 # Update the CRC using the given data.
636 if self._expected_crc is None:
637 # No need to compute the CRC if we don't have a reference value
638 return
639 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
640 # Check the CRC if we're at the end of the file
641 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000642 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000643
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000644 def read1(self, n):
645 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000646
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000647 # Simplify algorithm (branching) by transforming negative n to large n.
648 if n < 0 or n is None:
649 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000651 # Bytes available in read buffer.
652 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000653
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000654 # Read from file.
655 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
656 nbytes = n - len_readbuffer - len(self._unconsumed)
657 nbytes = max(nbytes, self.MIN_READ_SIZE)
658 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000659
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000660 data = self._fileobj.read(nbytes)
661 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000662
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000663 if data and self._decrypter is not None:
664 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000665
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000666 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000667 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000668 self._readbuffer = self._readbuffer[self._offset:] + data
669 self._offset = 0
670 else:
671 # Prepare deflated bytes for decompression.
672 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000673
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000674 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000675 if (len(self._unconsumed) > 0 and n > len_readbuffer and
676 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000677 data = self._decompressor.decompress(
678 self._unconsumed,
679 max(n - len_readbuffer, self.MIN_READ_SIZE)
680 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000681
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000682 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000683 eof = len(self._unconsumed) == 0 and self._compress_left == 0
684 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000685 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000686
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000687 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000688 self._readbuffer = self._readbuffer[self._offset:] + data
689 self._offset = 0
690
691 # Read from buffer.
692 data = self._readbuffer[self._offset: self._offset + n]
693 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000694 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000695
Łukasz Langae94980a2010-11-22 23:31:26 +0000696 def close(self):
697 try:
698 if self._close_fileobj:
699 self._fileobj.close()
700 finally:
701 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000702
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000703
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000704class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000705 """ Class with methods to open, read, write, close, list zip files.
706
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000707 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000708
Fred Drake3d9091e2001-03-26 15:49:24 +0000709 file: Either the path to the file, or a file-like object.
710 If it is a path, the file will be opened and closed by ZipFile.
711 mode: The mode can be either read "r", write "w" or append "a".
712 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000713 allowZip64: if True ZipFile will create files with ZIP64 extensions when
714 needed, otherwise it will raise an exception when this would
715 be necessary.
716
Fred Drake3d9091e2001-03-26 15:49:24 +0000717 """
Fred Drake484d7352000-10-02 21:14:52 +0000718
Fred Drake90eac282001-02-28 05:29:34 +0000719 fp = None # Set here since __del__ checks it
720
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000721 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000722 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000723 if mode not in ("r", "w", "a"):
724 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
725
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 if compression == ZIP_STORED:
727 pass
728 elif compression == ZIP_DEFLATED:
729 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000730 raise RuntimeError(
731 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000733 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000734
735 self._allowZip64 = allowZip64
736 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000737 self.debug = 0 # Level of printing: 0 through 3
738 self.NameToInfo = {} # Find file info given name
739 self.filelist = [] # List of ZipInfo instances for archive
740 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000741 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000742 self.pwd = None
R David Murray51804e92012-04-12 18:44:42 -0400743 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000744
Fred Drake3d9091e2001-03-26 15:49:24 +0000745 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000746 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000747 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000748 self._filePassed = 0
749 self.filename = file
750 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000751 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000752 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000753 except IOError:
754 if mode == 'a':
755 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000756 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000757 else:
758 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000759 else:
760 self._filePassed = 1
761 self.fp = file
762 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000763
Antoine Pitrou17babc52012-11-17 23:50:08 +0100764 try:
765 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000766 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100767 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000768 # set the modified flag so central directory gets written
769 # even if no files are added to the archive
770 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100771 elif key == 'a':
772 try:
773 # See if file is a zip file
774 self._RealGetContents()
775 # seek to start of directory and overwrite
776 self.fp.seek(self.start_dir, 0)
777 except BadZipFile:
778 # file is not a zip file, just append
779 self.fp.seek(0, 2)
780
781 # set the modified flag so central directory gets written
782 # even if no files are added to the archive
783 self._didModify = True
784 else:
785 raise RuntimeError('Mode must be "r", "w" or "a"')
786 except:
787 fp = self.fp
788 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000789 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100790 fp.close()
791 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000793 def __enter__(self):
794 return self
795
796 def __exit__(self, type, value, traceback):
797 self.close()
798
Tim Peters7d3bad62001-04-04 18:56:49 +0000799 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000800 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000801 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000802 try:
803 endrec = _EndRecData(fp)
804 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000805 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000806 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000807 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000808 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000809 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000810 size_cd = endrec[_ECD_SIZE] # bytes in central directory
811 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray51804e92012-04-12 18:44:42 -0400812 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000813
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000815 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000816 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
817 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000818 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
819
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000821 inferred = concat + offset_cd
822 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823 # self.start_dir: Position of start of central directory
824 self.start_dir = offset_cd + concat
825 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000826 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000827 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000828 total = 0
829 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000830 centdir = fp.read(sizeCentralDir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200831 if len(centdir) != sizeCentralDir:
832 raise BadZipFile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200834 if centdir[_CD_SIGNATURE] != stringCentralDir:
835 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000837 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000838 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000839 flags = centdir[5]
840 if flags & 0x800:
841 # UTF-8 file names extension
842 filename = filename.decode('utf-8')
843 else:
844 # Historical ZIP filename encoding
845 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000846 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000847 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000848 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
849 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000850 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000851 (x.create_version, x.create_system, x.extract_version, x.reserved,
852 x.flag_bits, x.compress_type, t, d,
853 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
854 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
855 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000856 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000857 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000858 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000859
860 x._decodeExtra()
861 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862 self.filelist.append(x)
863 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000864
865 # update total bytes read from central directory
866 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
867 + centdir[_CD_EXTRA_FIELD_LENGTH]
868 + centdir[_CD_COMMENT_LENGTH])
869
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000870 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000871 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000872
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000873
874 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000875 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000876 l = []
877 for data in self.filelist:
878 l.append(data.filename)
879 return l
880
881 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000882 """Return a list of class ZipInfo instances for files in the
883 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000884 return self.filelist
885
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000886 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000887 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000888 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
889 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000890 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000891 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000892 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
893 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000894
895 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000896 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000897 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 for zinfo in self.filelist:
899 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000900 # Read by chunks, to avoid an OverflowError or a
901 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +0100902 with self.open(zinfo.filename, "r") as f:
903 while f.read(chunk_size): # Check CRC-32
904 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000905 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000906 return zinfo.filename
907
908 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000909 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000910 info = self.NameToInfo.get(name)
911 if info is None:
912 raise KeyError(
913 'There is no item named %r in the archive' % name)
914
915 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000916
Thomas Wouterscf297e42007-02-23 15:07:44 +0000917 def setpassword(self, pwd):
918 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000919 if pwd and not isinstance(pwd, bytes):
920 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
921 if pwd:
922 self.pwd = pwd
923 else:
924 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000925
R David Murray51804e92012-04-12 18:44:42 -0400926 @property
927 def comment(self):
928 """The comment text associated with the ZIP file."""
929 return self._comment
930
931 @comment.setter
932 def comment(self, comment):
933 if not isinstance(comment, bytes):
934 raise TypeError("comment: expected bytes, got %s" % type(comment))
935 # check for valid comment length
936 if len(comment) >= ZIP_MAX_COMMENT:
937 if self.debug:
938 print('Archive comment is too long; truncating to %d bytes'
939 % ZIP_MAX_COMMENT)
940 comment = comment[:ZIP_MAX_COMMENT]
941 self._comment = comment
942 self._didModify = True
943
Thomas Wouterscf297e42007-02-23 15:07:44 +0000944 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000945 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000946 with self.open(name, "r", pwd) as fp:
947 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948
949 def open(self, name, mode="r", pwd=None):
950 """Return file-like object for 'name'."""
951 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000952 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +0000953 if pwd and not isinstance(pwd, bytes):
954 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000955 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000956 raise RuntimeError(
957 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000958
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 # Only open a new file for instances where we were not
960 # given a file object in the constructor
961 if self._filePassed:
962 zef_file = self.fp
963 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000964 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965
Antoine Pitrou17babc52012-11-17 23:50:08 +0100966 try:
967 # Make sure we have an info object
968 if isinstance(name, ZipInfo):
969 # 'name' is already an info object
970 zinfo = name
971 else:
972 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000973 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +0100974 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000975
Antoine Pitrou17babc52012-11-17 23:50:08 +0100976 # Skip the file header:
977 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchakad2b15272013-01-31 15:27:07 +0200978 if len(fheader) != sizeFileHeader:
979 raise BadZipFile("Truncated file header")
980 fheader = struct.unpack(structFileHeader, fheader)
981 if fheader[_FH_SIGNATURE] != stringFileHeader:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100982 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000983
Antoine Pitrou17babc52012-11-17 23:50:08 +0100984 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
985 if fheader[_FH_EXTRA_FIELD_LENGTH]:
986 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000987
Antoine Pitrou17babc52012-11-17 23:50:08 +0100988 if zinfo.flag_bits & 0x800:
989 # UTF-8 filename
990 fname_str = fname.decode("utf-8")
991 else:
992 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +0000993
Antoine Pitrou17babc52012-11-17 23:50:08 +0100994 if fname_str != zinfo.orig_filename:
995 raise BadZipFile(
996 'File name in directory %r and header %r differ.'
997 % (zinfo.orig_filename, fname))
998
999 # check for encrypted flag & handle password
1000 is_encrypted = zinfo.flag_bits & 0x1
1001 zd = None
1002 if is_encrypted:
1003 if not pwd:
1004 pwd = self.pwd
1005 if not pwd:
1006 raise RuntimeError("File %s is encrypted, password "
1007 "required for extraction" % name)
1008
1009 zd = _ZipDecrypter(pwd)
1010 # The first 12 bytes in the cypher stream is an encryption header
1011 # used to strengthen the algorithm. The first 11 bytes are
1012 # completely random, while the 12th contains the MSB of the CRC,
1013 # or the MSB of the file time depending on the header type
1014 # and is used to check the correctness of the password.
1015 header = zef_file.read(12)
1016 h = list(map(zd, header[0:12]))
1017 if zinfo.flag_bits & 0x8:
1018 # compare against the file type from extended local headers
1019 check_byte = (zinfo._raw_time >> 8) & 0xff
1020 else:
1021 # compare against the CRC otherwise
1022 check_byte = (zinfo.CRC >> 24) & 0xff
1023 if h[11] != check_byte:
1024 raise RuntimeError("Bad password for file", name)
1025
1026 return ZipExtFile(zef_file, mode, zinfo, zd,
1027 close_fileobj=not self._filePassed)
1028 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001029 if not self._filePassed:
1030 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001031 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032
Christian Heimes790c8232008-01-07 21:14:23 +00001033 def extract(self, member, path=None, pwd=None):
1034 """Extract a member from the archive to the current working directory,
1035 using its full name. Its file information is extracted as accurately
1036 as possible. `member' may be a filename or a ZipInfo object. You can
1037 specify a different directory using `path'.
1038 """
1039 if not isinstance(member, ZipInfo):
1040 member = self.getinfo(member)
1041
1042 if path is None:
1043 path = os.getcwd()
1044
1045 return self._extract_member(member, path, pwd)
1046
1047 def extractall(self, path=None, members=None, pwd=None):
1048 """Extract all members from the archive to the current working
1049 directory. `path' specifies a different directory to extract to.
1050 `members' is optional and must be a subset of the list returned
1051 by namelist().
1052 """
1053 if members is None:
1054 members = self.namelist()
1055
1056 for zipinfo in members:
1057 self.extract(zipinfo, path, pwd)
1058
1059 def _extract_member(self, member, targetpath, pwd):
1060 """Extract the ZipInfo object 'member' to a physical
1061 file on the path targetpath.
1062 """
1063 # build the destination pathname, replacing
1064 # forward slashes to platform specific separators.
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001065 arcname = member.filename.replace('/', os.path.sep)
Christian Heimes790c8232008-01-07 21:14:23 +00001066
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001067 if os.path.altsep:
1068 arcname = arcname.replace(os.path.altsep, os.path.sep)
1069 # interpret absolute pathname as relative, remove drive letter or
1070 # UNC path, redundant separators, "." and ".." components.
1071 arcname = os.path.splitdrive(arcname)[1]
1072 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1073 if x not in ('', os.path.curdir, os.path.pardir))
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001074 if os.path.sep == '\\':
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001075 # filter illegal characters on Windows
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001076 illegal = ':<>|"?*'
1077 table = str.maketrans(illegal, '_' * len(illegal))
1078 arcname = arcname.translate(table)
Serhiy Storchakae5e64442013-02-02 19:50:59 +02001079 # remove trailing dots
1080 arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1081 arcname = os.path.sep.join(x for x in arcname if x)
Christian Heimes790c8232008-01-07 21:14:23 +00001082
Gregory P. Smithb47acbf2013-02-01 11:22:43 -08001083 targetpath = os.path.join(targetpath, arcname)
Christian Heimes790c8232008-01-07 21:14:23 +00001084 targetpath = os.path.normpath(targetpath)
1085
1086 # Create all upper directories if necessary.
1087 upperdirs = os.path.dirname(targetpath)
1088 if upperdirs and not os.path.exists(upperdirs):
1089 os.makedirs(upperdirs)
1090
Martin v. Löwis59e47792009-01-24 14:10:07 +00001091 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001092 if not os.path.isdir(targetpath):
1093 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001094 return targetpath
1095
Antoine Pitrou17babc52012-11-17 23:50:08 +01001096 with self.open(member, pwd=pwd) as source, \
1097 open(targetpath, "wb") as target:
1098 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001099
1100 return targetpath
1101
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001103 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001104 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001105 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001106 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001107 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001108 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001109 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001110 raise RuntimeError(
1111 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001113 raise RuntimeError(
1114 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001115 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001116 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001117 if zinfo.file_size > ZIP64_LIMIT:
1118 if not self._allowZip64:
1119 raise LargeZipFile("Filesize would require ZIP64 extensions")
1120 if zinfo.header_offset > ZIP64_LIMIT:
1121 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001122 raise LargeZipFile(
1123 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001124
1125 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001126 """Put the bytes from filename into the archive under the name
1127 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001128 if not self.fp:
1129 raise RuntimeError(
1130 "Attempt to write to ZIP archive that was already closed")
1131
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001133 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001134 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 date_time = mtime[0:6]
1136 # Create ZipInfo instance to store file information
1137 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001138 arcname = filename
1139 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1140 while arcname[0] in (os.sep, os.altsep):
1141 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001142 if isdir:
1143 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001144 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001145 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001147 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148 else:
Tim Peterse1190062001-01-15 03:34:38 +00001149 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001150
1151 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001152 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001153 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001154
1155 self._writecheck(zinfo)
1156 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001157
1158 if isdir:
1159 zinfo.file_size = 0
1160 zinfo.compress_size = 0
1161 zinfo.CRC = 0
1162 self.filelist.append(zinfo)
1163 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001164 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis59e47792009-01-24 14:10:07 +00001165 return
1166
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001167 with open(filename, "rb") as fp:
1168 # Must overwrite CRC and sizes with correct data later
1169 zinfo.CRC = CRC = 0
1170 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001171 # Compressed size can be larger than uncompressed size
1172 zip64 = self._allowZip64 and \
1173 zinfo.file_size * 1.05 > ZIP64_LIMIT
1174 self.fp.write(zinfo.FileHeader(zip64))
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001175 if zinfo.compress_type == ZIP_DEFLATED:
1176 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1177 zlib.DEFLATED, -15)
1178 else:
1179 cmpr = None
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001180 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001181 while 1:
1182 buf = fp.read(1024 * 8)
1183 if not buf:
1184 break
1185 file_size = file_size + len(buf)
1186 CRC = crc32(buf, CRC) & 0xffffffff
1187 if cmpr:
1188 buf = cmpr.compress(buf)
1189 compress_size = compress_size + len(buf)
1190 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 if cmpr:
1192 buf = cmpr.flush()
1193 compress_size = compress_size + len(buf)
1194 self.fp.write(buf)
1195 zinfo.compress_size = compress_size
1196 else:
1197 zinfo.compress_size = file_size
1198 zinfo.CRC = CRC
1199 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001200 if not zip64 and self._allowZip64:
1201 if file_size > ZIP64_LIMIT:
1202 raise RuntimeError('File size has increased during compressing')
1203 if compress_size > ZIP64_LIMIT:
1204 raise RuntimeError('Compressed size larger than uncompressed size')
1205 # Seek backwards and write file header (which will now include
1206 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001207 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001208 self.fp.seek(zinfo.header_offset, 0)
1209 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001210 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001211 self.filelist.append(zinfo)
1212 self.NameToInfo[zinfo.filename] = zinfo
1213
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001214 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001215 """Write a file into the archive. The contents is 'data', which
1216 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1217 it is encoded as UTF-8 first.
1218 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001219 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001220 if isinstance(data, str):
1221 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001222 if not isinstance(zinfo_or_arcname, ZipInfo):
1223 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001224 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001225 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001226 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001227 else:
1228 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001229
1230 if not self.fp:
1231 raise RuntimeError(
1232 "Attempt to write to ZIP archive that was already closed")
1233
Guido van Rossum85825dc2007-08-27 17:03:28 +00001234 zinfo.file_size = len(data) # Uncompressed size
1235 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001236 if compress_type is not None:
1237 zinfo.compress_type = compress_type
1238
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001239 self._writecheck(zinfo)
1240 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001241 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242 if zinfo.compress_type == ZIP_DEFLATED:
1243 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1244 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001245 data = co.compress(data) + co.flush()
1246 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001247 else:
1248 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001249 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1250 zinfo.compress_size > ZIP64_LIMIT
1251 if zip64 and not self._allowZip64:
1252 raise LargeZipFile("Filesize would require ZIP64 extensions")
1253 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001254 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001255 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001256 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001257 fmt = '<LQQ' if zip64 else '<LLL'
1258 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001259 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001260 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 self.filelist.append(zinfo)
1262 self.NameToInfo[zinfo.filename] = zinfo
1263
1264 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001265 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001266 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001267
1268 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001269 """Close the file, and for mode "w" and "a" write the ending
1270 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001271 if self.fp is None:
1272 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001273
Antoine Pitrou17babc52012-11-17 23:50:08 +01001274 try:
1275 if self.mode in ("w", "a") and self._didModify: # write ending records
1276 count = 0
1277 pos1 = self.fp.tell()
1278 for zinfo in self.filelist: # write central directory
1279 count = count + 1
1280 dt = zinfo.date_time
1281 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1282 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1283 extra = []
1284 if zinfo.file_size > ZIP64_LIMIT \
1285 or zinfo.compress_size > ZIP64_LIMIT:
1286 extra.append(zinfo.file_size)
1287 extra.append(zinfo.compress_size)
1288 file_size = 0xffffffff
1289 compress_size = 0xffffffff
1290 else:
1291 file_size = zinfo.file_size
1292 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001293
Antoine Pitrou17babc52012-11-17 23:50:08 +01001294 if zinfo.header_offset > ZIP64_LIMIT:
1295 extra.append(zinfo.header_offset)
1296 header_offset = 0xffffffff
1297 else:
1298 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001299
Antoine Pitrou17babc52012-11-17 23:50:08 +01001300 extra_data = zinfo.extra
1301 if extra:
1302 # Append a ZIP64 field to the extra's
1303 extra_data = struct.pack(
1304 '<HH' + 'Q'*len(extra),
1305 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001306
Antoine Pitrou17babc52012-11-17 23:50:08 +01001307 extract_version = max(45, zinfo.extract_version)
1308 create_version = max(45, zinfo.create_version)
1309 else:
1310 extract_version = zinfo.extract_version
1311 create_version = zinfo.create_version
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001312
Antoine Pitrou17babc52012-11-17 23:50:08 +01001313 try:
1314 filename, flag_bits = zinfo._encodeFilenameFlags()
1315 centdir = struct.pack(structCentralDir,
1316 stringCentralDir, create_version,
1317 zinfo.create_system, extract_version, zinfo.reserved,
1318 flag_bits, zinfo.compress_type, dostime, dosdate,
1319 zinfo.CRC, compress_size, file_size,
1320 len(filename), len(extra_data), len(zinfo.comment),
1321 0, zinfo.internal_attr, zinfo.external_attr,
1322 header_offset)
1323 except DeprecationWarning:
1324 print((structCentralDir, stringCentralDir, create_version,
1325 zinfo.create_system, extract_version, zinfo.reserved,
1326 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1327 zinfo.CRC, compress_size, file_size,
1328 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1329 0, zinfo.internal_attr, zinfo.external_attr,
1330 header_offset), file=sys.stderr)
1331 raise
1332 self.fp.write(centdir)
1333 self.fp.write(filename)
1334 self.fp.write(extra_data)
1335 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001336
Antoine Pitrou17babc52012-11-17 23:50:08 +01001337 pos2 = self.fp.tell()
1338 # Write end-of-zip-archive record
1339 centDirCount = count
1340 centDirSize = pos2 - pos1
1341 centDirOffset = pos1
1342 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1343 centDirOffset > ZIP64_LIMIT or
1344 centDirSize > ZIP64_LIMIT):
1345 # Need to write the ZIP64 end-of-archive records
1346 zip64endrec = struct.pack(
1347 structEndArchive64, stringEndArchive64,
1348 44, 45, 45, 0, 0, centDirCount, centDirCount,
1349 centDirSize, centDirOffset)
1350 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001351
Antoine Pitrou17babc52012-11-17 23:50:08 +01001352 zip64locrec = struct.pack(
1353 structEndArchive64Locator,
1354 stringEndArchive64Locator, 0, pos2, 1)
1355 self.fp.write(zip64locrec)
1356 centDirCount = min(centDirCount, 0xFFFF)
1357 centDirSize = min(centDirSize, 0xFFFFFFFF)
1358 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001359
Antoine Pitrou17babc52012-11-17 23:50:08 +01001360 endrec = struct.pack(structEndArchive, stringEndArchive,
1361 0, 0, centDirCount, centDirCount,
1362 centDirSize, centDirOffset, len(self._comment))
1363 self.fp.write(endrec)
1364 self.fp.write(self._comment)
1365 self.fp.flush()
1366 finally:
1367 fp = self.fp
1368 self.fp = None
1369 if not self._filePassed:
1370 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371
1372
1373class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001374 """Class to create ZIP archives with Python library files and packages."""
1375
Georg Brandl8334fd92010-12-04 10:26:46 +00001376 def __init__(self, file, mode="r", compression=ZIP_STORED,
1377 allowZip64=False, optimize=-1):
1378 ZipFile.__init__(self, file, mode=mode, compression=compression,
1379 allowZip64=allowZip64)
1380 self._optimize = optimize
1381
Georg Brandlfe991052009-09-16 15:54:04 +00001382 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001383 """Add all files from "pathname" to the ZIP archive.
1384
Fred Drake484d7352000-10-02 21:14:52 +00001385 If pathname is a package directory, search the directory and
1386 all package subdirectories recursively for all *.py and enter
1387 the modules into the archive. If pathname is a plain
1388 directory, listdir *.py and enter all modules. Else, pathname
1389 must be a Python *.py file and the module will be put into the
1390 archive. Added modules are always module.pyo or module.pyc.
1391 This method will compile the module.py into module.pyc if
1392 necessary.
1393 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001394 dir, name = os.path.split(pathname)
1395 if os.path.isdir(pathname):
1396 initname = os.path.join(pathname, "__init__.py")
1397 if os.path.isfile(initname):
1398 # This is a package directory, add it
1399 if basename:
1400 basename = "%s/%s" % (basename, name)
1401 else:
1402 basename = name
1403 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001404 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001405 fname, arcname = self._get_codename(initname[0:-3], basename)
1406 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001407 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001408 self.write(fname, arcname)
1409 dirlist = os.listdir(pathname)
1410 dirlist.remove("__init__.py")
1411 # Add all *.py files and package subdirectories
1412 for filename in dirlist:
1413 path = os.path.join(pathname, filename)
1414 root, ext = os.path.splitext(filename)
1415 if os.path.isdir(path):
1416 if os.path.isfile(os.path.join(path, "__init__.py")):
1417 # This is a package directory, add it
1418 self.writepy(path, basename) # Recursive call
1419 elif ext == ".py":
1420 fname, arcname = self._get_codename(path[0:-3],
1421 basename)
1422 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001423 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001424 self.write(fname, arcname)
1425 else:
1426 # This is NOT a package directory, add its files at top level
1427 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001428 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001429 for filename in os.listdir(pathname):
1430 path = os.path.join(pathname, filename)
1431 root, ext = os.path.splitext(filename)
1432 if ext == ".py":
1433 fname, arcname = self._get_codename(path[0:-3],
1434 basename)
1435 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001436 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001437 self.write(fname, arcname)
1438 else:
1439 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001440 raise RuntimeError(
1441 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001442 fname, arcname = self._get_codename(pathname[0:-3], basename)
1443 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001444 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001445 self.write(fname, arcname)
1446
1447 def _get_codename(self, pathname, basename):
1448 """Return (filename, archivename) for the path.
1449
Fred Drake484d7352000-10-02 21:14:52 +00001450 Given a module name path, return the correct file path and
1451 archive name, compiling if necessary. For example, given
1452 /python/lib/string, return (/python/lib/string.pyc, string).
1453 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001454 def _compile(file, optimize=-1):
1455 import py_compile
1456 if self.debug:
1457 print("Compiling", file)
1458 try:
1459 py_compile.compile(file, doraise=True, optimize=optimize)
Serhiy Storchaka45c43752013-01-29 20:10:28 +02001460 except py_compile.PyCompileError as err:
Georg Brandl8334fd92010-12-04 10:26:46 +00001461 print(err.msg)
1462 return False
1463 return True
1464
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001465 file_py = pathname + ".py"
1466 file_pyc = pathname + ".pyc"
1467 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001468 pycache_pyc = imp.cache_from_source(file_py, True)
1469 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001470 if self._optimize == -1:
1471 # legacy mode: use whatever file is present
1472 if (os.path.isfile(file_pyo) and
1473 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1474 # Use .pyo file.
1475 arcname = fname = file_pyo
1476 elif (os.path.isfile(file_pyc) and
1477 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1478 # Use .pyc file.
1479 arcname = fname = file_pyc
1480 elif (os.path.isfile(pycache_pyc) and
1481 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1482 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1483 # file name in the archive.
1484 fname = pycache_pyc
1485 arcname = file_pyc
1486 elif (os.path.isfile(pycache_pyo) and
1487 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1488 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1489 # file name in the archive.
1490 fname = pycache_pyo
1491 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001492 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001493 # Compile py into PEP 3147 pyc file.
1494 if _compile(file_py):
1495 fname = (pycache_pyc if __debug__ else pycache_pyo)
1496 arcname = (file_pyc if __debug__ else file_pyo)
1497 else:
1498 fname = arcname = file_py
1499 else:
1500 # new mode: use given optimization level
1501 if self._optimize == 0:
1502 fname = pycache_pyc
1503 arcname = file_pyc
1504 else:
1505 fname = pycache_pyo
1506 arcname = file_pyo
1507 if not (os.path.isfile(fname) and
1508 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1509 if not _compile(file_py, optimize=self._optimize):
1510 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001511 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001512 if basename:
1513 archivename = "%s/%s" % (basename, archivename)
1514 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001515
1516
1517def main(args = None):
1518 import textwrap
1519 USAGE=textwrap.dedent("""\
1520 Usage:
1521 zipfile.py -l zipfile.zip # Show listing of a zipfile
1522 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1523 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1524 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1525 """)
1526 if args is None:
1527 args = sys.argv[1:]
1528
1529 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001530 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001531 sys.exit(1)
1532
1533 if args[0] == '-l':
1534 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001535 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001536 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001537 with ZipFile(args[1], 'r') as zf:
1538 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001539
1540 elif args[0] == '-t':
1541 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001542 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001543 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001544 with ZipFile(args[1], 'r') as zf:
1545 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001546 if badfile:
1547 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001548 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001549
1550 elif args[0] == '-e':
1551 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001552 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001553 sys.exit(1)
1554
Antoine Pitrou17babc52012-11-17 23:50:08 +01001555 with ZipFile(args[1], 'r') as zf:
1556 out = args[2]
1557 for path in zf.namelist():
1558 if path.startswith('./'):
1559 tgt = os.path.join(out, path[2:])
1560 else:
1561 tgt = os.path.join(out, path)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001562
Antoine Pitrou17babc52012-11-17 23:50:08 +01001563 tgtdir = os.path.dirname(tgt)
1564 if not os.path.exists(tgtdir):
1565 os.makedirs(tgtdir)
1566 with open(tgt, 'wb') as fp:
1567 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001568
1569 elif args[0] == '-c':
1570 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001571 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001572 sys.exit(1)
1573
1574 def addToZip(zf, path, zippath):
1575 if os.path.isfile(path):
1576 zf.write(path, zippath, ZIP_DEFLATED)
1577 elif os.path.isdir(path):
1578 for nm in os.listdir(path):
1579 addToZip(zf,
1580 os.path.join(path, nm), os.path.join(zippath, nm))
1581 # else: ignore
1582
Antoine Pitrou17babc52012-11-17 23:50:08 +01001583 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1584 for src in args[2:]:
1585 addToZip(zf, src, os.path.basename(src))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001586
1587if __name__ == "__main__":
1588 main()