blob: 32cf42c19870988c280ae88955849e17c8e225af [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
180 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000184 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000191 struct.unpack(structEndArchive64, data)
192 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000196 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 endrec[_ECD_DISK_NUMBER] = disk_num
198 endrec[_ECD_DISK_START] = disk_dir
199 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
200 endrec[_ECD_ENTRIES_TOTAL] = dircount2
201 endrec[_ECD_SIZE] = dirsize
202 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206def _EndRecData(fpin):
207 """Return data from the "End of Central Directory" record, or None.
208
209 The data is a list of the nine items in the ZIP "End of central dir"
210 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211
212 # Determine file size
213 fpin.seek(0, 2)
214 filesize = fpin.tell()
215
216 # Check to see if this is ZIP file with no archive comment (the
217 # "end of central directory" structure should be the last item in the
218 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000219 try:
220 fpin.seek(-sizeEndCentDir, 2)
221 except IOError:
222 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000224 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec=list(endrec)
228
229 # Append a blank comment and record start offset
230 endrec.append(b"")
231 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253
R David Murray4fbb9db2011-06-09 15:50:51 -0400254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
258 # Unable to find a valid end of central directory structure
259 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000260
Fred Drake484d7352000-10-02 21:14:52 +0000261
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000263 """Class with attributes describing each file in the ZIP archive."""
264
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000284 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 )
286
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000288 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000299 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000300
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800303
304 if date_time[0] < 1980:
305 raise ValueError('ZIP does not support timestamps before 1980')
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000308 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000309 self.comment = b"" # Comment for each file
310 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000311 if sys.platform == 'win32':
312 self.create_system = 0 # System which created ZIP archive
313 else:
314 # Assume everything else is unix-y
315 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000316 self.create_version = 20 # Version which created ZIP archive
317 self.extract_version = 20 # Version needed to extract archive
318 self.reserved = 0 # Must be zero
319 self.flag_bits = 0 # ZIP flag bits
320 self.volume = 0 # Volume number of file header
321 self.internal_attr = 0 # Internal attributes
322 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000324 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000325 # CRC CRC-32 of the uncompressed file
326 # compress_size Size of the compressed file
327 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200329 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000330 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 dt = self.date_time
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # Set these to zero because we write them after the file data
336 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 else:
Tim Peterse1190062001-01-15 03:34:38 +0000338 CRC = self.CRC
339 compress_size = self.compress_size
340 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
342 extra = self.extra
343
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200344 if zip64 is None:
345 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346 if zip64:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000347 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000348 extra = extra + struct.pack(fmt,
349 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +0200350 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351 if not zip64:
352 raise LargeZipFile("Filesize would require ZIP64 extensions")
353 # File is larger than what fits into a 4 byte integer,
354 # fall back to the ZIP64 extension
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000355 file_size = 0xffffffff
356 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357 self.extract_version = max(45, self.extract_version)
358 self.create_version = max(45, self.extract_version)
359
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000360 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000361 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000362 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363 self.compress_type, dostime, dosdate, CRC,
364 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000365 len(filename), len(extra))
366 return header + filename + extra
367
368 def _encodeFilenameFlags(self):
369 try:
370 return self.filename.encode('ascii'), self.flag_bits
371 except UnicodeEncodeError:
372 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373
374 def _decodeExtra(self):
375 # Try to decode the extra field.
376 extra = self.extra
377 unpack = struct.unpack
378 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000379 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000380 if tp == 1:
381 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000382 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000384 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000385 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000386 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000387 elif ln == 0:
388 counts = ()
389 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000390 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391
392 idx = 0
393
394 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000395 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396 self.file_size = counts[idx]
397 idx += 1
398
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000399 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000400 self.compress_size = counts[idx]
401 idx += 1
402
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000403 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000404 old = self.header_offset
405 self.header_offset = counts[idx]
406 idx+=1
407
408 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000409
410
Thomas Wouterscf297e42007-02-23 15:07:44 +0000411class _ZipDecrypter:
412 """Class to handle decryption of files stored within a ZIP archive.
413
414 ZIP supports a password-based form of encryption. Even though known
415 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000416 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000417
418 Usage:
419 zd = _ZipDecrypter(mypwd)
420 plain_char = zd(cypher_char)
421 plain_text = map(zd, cypher_text)
422 """
423
424 def _GenerateCRCTable():
425 """Generate a CRC-32 table.
426
427 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
428 internal keys. We noticed that a direct implementation is faster than
429 relying on binascii.crc32().
430 """
431 poly = 0xedb88320
432 table = [0] * 256
433 for i in range(256):
434 crc = i
435 for j in range(8):
436 if crc & 1:
437 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
438 else:
439 crc = ((crc >> 1) & 0x7FFFFFFF)
440 table[i] = crc
441 return table
442 crctable = _GenerateCRCTable()
443
444 def _crc32(self, ch, crc):
445 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000446 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000447
448 def __init__(self, pwd):
449 self.key0 = 305419896
450 self.key1 = 591751049
451 self.key2 = 878082192
452 for p in pwd:
453 self._UpdateKeys(p)
454
455 def _UpdateKeys(self, c):
456 self.key0 = self._crc32(c, self.key0)
457 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
458 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000459 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000460
461 def __call__(self, c):
462 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000463 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000464 k = self.key2 | 2
465 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000466 self._UpdateKeys(c)
467 return c
468
Ezio Melotti6a5fc4c2012-11-18 13:20:36 +0200469
470compressor_names = {
471 0: 'store',
472 1: 'shrink',
473 2: 'reduce',
474 3: 'reduce',
475 4: 'reduce',
476 5: 'reduce',
477 6: 'implode',
478 7: 'tokenize',
479 8: 'deflate',
480 9: 'deflate64',
481 10: 'implode',
482 12: 'bzip2',
483 14: 'lzma',
484 18: 'terse',
485 19: 'lz77',
486 97: 'wavpack',
487 98: 'ppmd',
488}
489
490
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000491class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492 """File-like object for reading an archive member.
493 Is returned by ZipFile.open().
494 """
495
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000496 # Max size supported by decompressor.
497 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000498
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000499 # Read from compressed files in 4k blocks.
500 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000501
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000502 # Search for universal newlines or line chunks.
503 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
504
Łukasz Langae94980a2010-11-22 23:31:26 +0000505 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
506 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000507 self._fileobj = fileobj
508 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000509 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000510
Ezio Melotti92b47432010-01-28 01:44:41 +0000511 self._compress_type = zipinfo.compress_type
512 self._compress_size = zipinfo.compress_size
513 self._compress_left = zipinfo.compress_size
514
515 if self._compress_type == ZIP_DEFLATED:
516 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti6a5fc4c2012-11-18 13:20:36 +0200517 elif self._compress_type != ZIP_STORED:
518 descr = compressor_names.get(self._compress_type)
519 if descr:
520 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
521 else:
522 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000523 self._unconsumed = b''
524
525 self._readbuffer = b''
526 self._offset = 0
527
528 self._universal = 'U' in mode
529 self.newlines = None
530
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000531 # Adjust read size for encrypted files since the first 12 bytes
532 # are for the encryption/password information.
533 if self._decrypter is not None:
534 self._compress_left -= 12
535
536 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000537 self.name = zipinfo.filename
538
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000539 if hasattr(zipinfo, 'CRC'):
540 self._expected_crc = zipinfo.CRC
541 self._running_crc = crc32(b'') & 0xffffffff
542 else:
543 self._expected_crc = None
544
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000545 def readline(self, limit=-1):
546 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000547
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000548 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000549 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000550
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000551 if not self._universal and limit < 0:
552 # Shortcut common case - newline found in buffer.
553 i = self._readbuffer.find(b'\n', self._offset) + 1
554 if i > 0:
555 line = self._readbuffer[self._offset: i]
556 self._offset = i
557 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000558
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000559 if not self._universal:
560 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000561
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000562 line = b''
563 while limit < 0 or len(line) < limit:
564 readahead = self.peek(2)
565 if readahead == b'':
566 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000568 #
569 # Search for universal newlines or line chunks.
570 #
571 # The pattern returns either a line chunk or a newline, but not
572 # both. Combined with peek(2), we are assured that the sequence
573 # '\r\n' is always retrieved completely and never split into
574 # separate newlines - '\r', '\n' due to coincidental readaheads.
575 #
576 match = self.PATTERN.search(readahead)
577 newline = match.group('newline')
578 if newline is not None:
579 if self.newlines is None:
580 self.newlines = []
581 if newline not in self.newlines:
582 self.newlines.append(newline)
583 self._offset += len(newline)
584 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000585
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000586 chunk = match.group('chunk')
587 if limit >= 0:
588 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000589
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000590 self._offset += len(chunk)
591 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000592
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000593 return line
594
595 def peek(self, n=1):
596 """Returns buffered bytes without advancing the position."""
597 if n > len(self._readbuffer) - self._offset:
598 chunk = self.read(n)
599 self._offset -= len(chunk)
600
601 # Return up to 512 bytes to reduce allocation overhead for tight loops.
602 return self._readbuffer[self._offset: self._offset + 512]
603
604 def readable(self):
605 return True
606
607 def read(self, n=-1):
608 """Read and return up to n bytes.
609 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000610 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000611 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000612 if n is None:
613 n = -1
614 while True:
615 if n < 0:
616 data = self.read1(n)
617 elif n > len(buf):
618 data = self.read1(n - len(buf))
619 else:
620 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621 if len(data) == 0:
622 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000623 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000624
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000625 def _update_crc(self, newdata, eof):
626 # Update the CRC using the given data.
627 if self._expected_crc is None:
628 # No need to compute the CRC if we don't have a reference value
629 return
630 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
631 # Check the CRC if we're at the end of the file
632 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000633 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000634
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000635 def read1(self, n):
636 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000637
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638 # Simplify algorithm (branching) by transforming negative n to large n.
639 if n < 0 or n is None:
640 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000641
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000642 # Bytes available in read buffer.
643 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000644
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000645 # Read from file.
646 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
647 nbytes = n - len_readbuffer - len(self._unconsumed)
648 nbytes = max(nbytes, self.MIN_READ_SIZE)
649 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000650
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000651 data = self._fileobj.read(nbytes)
652 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000653
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000654 if data and self._decrypter is not None:
655 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000657 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000658 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000659 self._readbuffer = self._readbuffer[self._offset:] + data
660 self._offset = 0
661 else:
662 # Prepare deflated bytes for decompression.
663 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000664
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000665 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000666 if (len(self._unconsumed) > 0 and n > len_readbuffer and
667 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000668 data = self._decompressor.decompress(
669 self._unconsumed,
670 max(n - len_readbuffer, self.MIN_READ_SIZE)
671 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000672
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000673 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000674 eof = len(self._unconsumed) == 0 and self._compress_left == 0
675 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000676 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000677
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000678 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000679 self._readbuffer = self._readbuffer[self._offset:] + data
680 self._offset = 0
681
682 # Read from buffer.
683 data = self._readbuffer[self._offset: self._offset + n]
684 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000685 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000686
Łukasz Langae94980a2010-11-22 23:31:26 +0000687 def close(self):
688 try:
689 if self._close_fileobj:
690 self._fileobj.close()
691 finally:
692 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000693
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000694
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000695class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000696 """ Class with methods to open, read, write, close, list zip files.
697
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000698 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000699
Fred Drake3d9091e2001-03-26 15:49:24 +0000700 file: Either the path to the file, or a file-like object.
701 If it is a path, the file will be opened and closed by ZipFile.
702 mode: The mode can be either read "r", write "w" or append "a".
703 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000704 allowZip64: if True ZipFile will create files with ZIP64 extensions when
705 needed, otherwise it will raise an exception when this would
706 be necessary.
707
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 """
Fred Drake484d7352000-10-02 21:14:52 +0000709
Fred Drake90eac282001-02-28 05:29:34 +0000710 fp = None # Set here since __del__ checks it
711
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000712 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000713 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000714 if mode not in ("r", "w", "a"):
715 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
716
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000717 if compression == ZIP_STORED:
718 pass
719 elif compression == ZIP_DEFLATED:
720 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000721 raise RuntimeError(
722 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000724 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000725
726 self._allowZip64 = allowZip64
727 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000728 self.debug = 0 # Level of printing: 0 through 3
729 self.NameToInfo = {} # Find file info given name
730 self.filelist = [] # List of ZipInfo instances for archive
731 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000732 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000733 self.pwd = None
R David Murray51804e92012-04-12 18:44:42 -0400734 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000735
Fred Drake3d9091e2001-03-26 15:49:24 +0000736 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000737 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000738 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000739 self._filePassed = 0
740 self.filename = file
741 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000742 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000743 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000744 except IOError:
745 if mode == 'a':
746 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000747 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000748 else:
749 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000750 else:
751 self._filePassed = 1
752 self.fp = file
753 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000754
Antoine Pitrou17babc52012-11-17 23:50:08 +0100755 try:
756 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000757 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100758 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000759 # set the modified flag so central directory gets written
760 # even if no files are added to the archive
761 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100762 elif key == 'a':
763 try:
764 # See if file is a zip file
765 self._RealGetContents()
766 # seek to start of directory and overwrite
767 self.fp.seek(self.start_dir, 0)
768 except BadZipFile:
769 # file is not a zip file, just append
770 self.fp.seek(0, 2)
771
772 # set the modified flag so central directory gets written
773 # even if no files are added to the archive
774 self._didModify = True
775 else:
776 raise RuntimeError('Mode must be "r", "w" or "a"')
777 except:
778 fp = self.fp
779 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000780 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100781 fp.close()
782 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000784 def __enter__(self):
785 return self
786
787 def __exit__(self, type, value, traceback):
788 self.close()
789
Tim Peters7d3bad62001-04-04 18:56:49 +0000790 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000791 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000793 try:
794 endrec = _EndRecData(fp)
795 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000796 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000797 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000798 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000800 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000801 size_cd = endrec[_ECD_SIZE] # bytes in central directory
802 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray51804e92012-04-12 18:44:42 -0400803 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000804
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000806 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000807 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
808 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000809 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
810
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000812 inferred = concat + offset_cd
813 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 # self.start_dir: Position of start of central directory
815 self.start_dir = offset_cd + concat
816 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000817 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000818 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 total = 0
820 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000821 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000822 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000823 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824 centdir = struct.unpack(structCentralDir, centdir)
825 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000826 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000827 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000828 flags = centdir[5]
829 if flags & 0x800:
830 # UTF-8 file names extension
831 filename = filename.decode('utf-8')
832 else:
833 # Historical ZIP filename encoding
834 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000836 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000837 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
838 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000839 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 (x.create_version, x.create_system, x.extract_version, x.reserved,
841 x.flag_bits, x.compress_type, t, d,
842 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
843 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
844 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000845 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000846 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000847 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000848
849 x._decodeExtra()
850 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000851 self.filelist.append(x)
852 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000853
854 # update total bytes read from central directory
855 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
856 + centdir[_CD_EXTRA_FIELD_LENGTH]
857 + centdir[_CD_COMMENT_LENGTH])
858
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000859 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000860 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000861
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862
863 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000864 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000865 l = []
866 for data in self.filelist:
867 l.append(data.filename)
868 return l
869
870 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000871 """Return a list of class ZipInfo instances for files in the
872 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000873 return self.filelist
874
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000875 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000876 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000877 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
878 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000879 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000880 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000881 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
882 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000883
884 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000885 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000886 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000887 for zinfo in self.filelist:
888 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000889 # Read by chunks, to avoid an OverflowError or a
890 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +0100891 with self.open(zinfo.filename, "r") as f:
892 while f.read(chunk_size): # Check CRC-32
893 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000894 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000895 return zinfo.filename
896
897 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000898 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000899 info = self.NameToInfo.get(name)
900 if info is None:
901 raise KeyError(
902 'There is no item named %r in the archive' % name)
903
904 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000905
Thomas Wouterscf297e42007-02-23 15:07:44 +0000906 def setpassword(self, pwd):
907 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000908 if pwd and not isinstance(pwd, bytes):
909 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
910 if pwd:
911 self.pwd = pwd
912 else:
913 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000914
R David Murray51804e92012-04-12 18:44:42 -0400915 @property
916 def comment(self):
917 """The comment text associated with the ZIP file."""
918 return self._comment
919
920 @comment.setter
921 def comment(self, comment):
922 if not isinstance(comment, bytes):
923 raise TypeError("comment: expected bytes, got %s" % type(comment))
924 # check for valid comment length
925 if len(comment) >= ZIP_MAX_COMMENT:
926 if self.debug:
927 print('Archive comment is too long; truncating to %d bytes'
928 % ZIP_MAX_COMMENT)
929 comment = comment[:ZIP_MAX_COMMENT]
930 self._comment = comment
931 self._didModify = True
932
Thomas Wouterscf297e42007-02-23 15:07:44 +0000933 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000934 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000935 with self.open(name, "r", pwd) as fp:
936 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000937
938 def open(self, name, mode="r", pwd=None):
939 """Return file-like object for 'name'."""
940 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000941 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +0000942 if pwd and not isinstance(pwd, bytes):
943 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000944 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000945 raise RuntimeError(
946 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000947
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948 # Only open a new file for instances where we were not
949 # given a file object in the constructor
950 if self._filePassed:
951 zef_file = self.fp
952 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000953 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954
Antoine Pitrou17babc52012-11-17 23:50:08 +0100955 try:
956 # Make sure we have an info object
957 if isinstance(name, ZipInfo):
958 # 'name' is already an info object
959 zinfo = name
960 else:
961 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000962 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +0100963 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000964
Antoine Pitrou17babc52012-11-17 23:50:08 +0100965 # Skip the file header:
966 fheader = zef_file.read(sizeFileHeader)
967 if fheader[0:4] != stringFileHeader:
968 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000969
Antoine Pitrou17babc52012-11-17 23:50:08 +0100970 fheader = struct.unpack(structFileHeader, fheader)
971 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
972 if fheader[_FH_EXTRA_FIELD_LENGTH]:
973 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000974
Antoine Pitrou17babc52012-11-17 23:50:08 +0100975 if zinfo.flag_bits & 0x800:
976 # UTF-8 filename
977 fname_str = fname.decode("utf-8")
978 else:
979 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +0000980
Antoine Pitrou17babc52012-11-17 23:50:08 +0100981 if fname_str != zinfo.orig_filename:
982 raise BadZipFile(
983 'File name in directory %r and header %r differ.'
984 % (zinfo.orig_filename, fname))
985
986 # check for encrypted flag & handle password
987 is_encrypted = zinfo.flag_bits & 0x1
988 zd = None
989 if is_encrypted:
990 if not pwd:
991 pwd = self.pwd
992 if not pwd:
993 raise RuntimeError("File %s is encrypted, password "
994 "required for extraction" % name)
995
996 zd = _ZipDecrypter(pwd)
997 # The first 12 bytes in the cypher stream is an encryption header
998 # used to strengthen the algorithm. The first 11 bytes are
999 # completely random, while the 12th contains the MSB of the CRC,
1000 # or the MSB of the file time depending on the header type
1001 # and is used to check the correctness of the password.
1002 header = zef_file.read(12)
1003 h = list(map(zd, header[0:12]))
1004 if zinfo.flag_bits & 0x8:
1005 # compare against the file type from extended local headers
1006 check_byte = (zinfo._raw_time >> 8) & 0xff
1007 else:
1008 # compare against the CRC otherwise
1009 check_byte = (zinfo.CRC >> 24) & 0xff
1010 if h[11] != check_byte:
1011 raise RuntimeError("Bad password for file", name)
1012
1013 return ZipExtFile(zef_file, mode, zinfo, zd,
1014 close_fileobj=not self._filePassed)
1015 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001016 if not self._filePassed:
1017 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +01001018 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001019
Christian Heimes790c8232008-01-07 21:14:23 +00001020 def extract(self, member, path=None, pwd=None):
1021 """Extract a member from the archive to the current working directory,
1022 using its full name. Its file information is extracted as accurately
1023 as possible. `member' may be a filename or a ZipInfo object. You can
1024 specify a different directory using `path'.
1025 """
1026 if not isinstance(member, ZipInfo):
1027 member = self.getinfo(member)
1028
1029 if path is None:
1030 path = os.getcwd()
1031
1032 return self._extract_member(member, path, pwd)
1033
1034 def extractall(self, path=None, members=None, pwd=None):
1035 """Extract all members from the archive to the current working
1036 directory. `path' specifies a different directory to extract to.
1037 `members' is optional and must be a subset of the list returned
1038 by namelist().
1039 """
1040 if members is None:
1041 members = self.namelist()
1042
1043 for zipinfo in members:
1044 self.extract(zipinfo, path, pwd)
1045
1046 def _extract_member(self, member, targetpath, pwd):
1047 """Extract the ZipInfo object 'member' to a physical
1048 file on the path targetpath.
1049 """
1050 # build the destination pathname, replacing
1051 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001052 # Strip trailing path separator, unless it represents the root.
1053 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1054 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001055 targetpath = targetpath[:-1]
1056
1057 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001058 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001059 targetpath = os.path.join(targetpath, member.filename[1:])
1060 else:
1061 targetpath = os.path.join(targetpath, member.filename)
1062
1063 targetpath = os.path.normpath(targetpath)
1064
1065 # Create all upper directories if necessary.
1066 upperdirs = os.path.dirname(targetpath)
1067 if upperdirs and not os.path.exists(upperdirs):
1068 os.makedirs(upperdirs)
1069
Martin v. Löwis59e47792009-01-24 14:10:07 +00001070 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001071 if not os.path.isdir(targetpath):
1072 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001073 return targetpath
1074
Antoine Pitrou17babc52012-11-17 23:50:08 +01001075 with self.open(member, pwd=pwd) as source, \
1076 open(targetpath, "wb") as target:
1077 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001078
1079 return targetpath
1080
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001082 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001083 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001084 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001085 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001086 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001087 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001088 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001089 raise RuntimeError(
1090 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001092 raise RuntimeError(
1093 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001095 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001096 if zinfo.file_size > ZIP64_LIMIT:
1097 if not self._allowZip64:
1098 raise LargeZipFile("Filesize would require ZIP64 extensions")
1099 if zinfo.header_offset > ZIP64_LIMIT:
1100 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001101 raise LargeZipFile(
1102 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103
1104 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001105 """Put the bytes from filename into the archive under the name
1106 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001107 if not self.fp:
1108 raise RuntimeError(
1109 "Attempt to write to ZIP archive that was already closed")
1110
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001112 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001113 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 date_time = mtime[0:6]
1115 # Create ZipInfo instance to store file information
1116 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001117 arcname = filename
1118 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1119 while arcname[0] in (os.sep, os.altsep):
1120 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001121 if isdir:
1122 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001123 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001124 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001126 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127 else:
Tim Peterse1190062001-01-15 03:34:38 +00001128 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001129
1130 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001131 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001132 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001133
1134 self._writecheck(zinfo)
1135 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001136
1137 if isdir:
1138 zinfo.file_size = 0
1139 zinfo.compress_size = 0
1140 zinfo.CRC = 0
1141 self.filelist.append(zinfo)
1142 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001143 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis59e47792009-01-24 14:10:07 +00001144 return
1145
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001146 with open(filename, "rb") as fp:
1147 # Must overwrite CRC and sizes with correct data later
1148 zinfo.CRC = CRC = 0
1149 zinfo.compress_size = compress_size = 0
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001150 # Compressed size can be larger than uncompressed size
1151 zip64 = self._allowZip64 and \
1152 zinfo.file_size * 1.05 > ZIP64_LIMIT
1153 self.fp.write(zinfo.FileHeader(zip64))
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001154 if zinfo.compress_type == ZIP_DEFLATED:
1155 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1156 zlib.DEFLATED, -15)
1157 else:
1158 cmpr = None
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001159 file_size = 0
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001160 while 1:
1161 buf = fp.read(1024 * 8)
1162 if not buf:
1163 break
1164 file_size = file_size + len(buf)
1165 CRC = crc32(buf, CRC) & 0xffffffff
1166 if cmpr:
1167 buf = cmpr.compress(buf)
1168 compress_size = compress_size + len(buf)
1169 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001170 if cmpr:
1171 buf = cmpr.flush()
1172 compress_size = compress_size + len(buf)
1173 self.fp.write(buf)
1174 zinfo.compress_size = compress_size
1175 else:
1176 zinfo.compress_size = file_size
1177 zinfo.CRC = CRC
1178 zinfo.file_size = file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001179 if not zip64 and self._allowZip64:
1180 if file_size > ZIP64_LIMIT:
1181 raise RuntimeError('File size has increased during compressing')
1182 if compress_size > ZIP64_LIMIT:
1183 raise RuntimeError('Compressed size larger than uncompressed size')
1184 # Seek backwards and write file header (which will now include
1185 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001186 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001187 self.fp.seek(zinfo.header_offset, 0)
1188 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001189 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001190 self.filelist.append(zinfo)
1191 self.NameToInfo[zinfo.filename] = zinfo
1192
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001193 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001194 """Write a file into the archive. The contents is 'data', which
1195 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1196 it is encoded as UTF-8 first.
1197 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001198 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001199 if isinstance(data, str):
1200 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001201 if not isinstance(zinfo_or_arcname, ZipInfo):
1202 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001203 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001204 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001205 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001206 else:
1207 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001208
1209 if not self.fp:
1210 raise RuntimeError(
1211 "Attempt to write to ZIP archive that was already closed")
1212
Guido van Rossum85825dc2007-08-27 17:03:28 +00001213 zinfo.file_size = len(data) # Uncompressed size
1214 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001215 if compress_type is not None:
1216 zinfo.compress_type = compress_type
1217
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001218 self._writecheck(zinfo)
1219 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001220 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 if zinfo.compress_type == ZIP_DEFLATED:
1222 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1223 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001224 data = co.compress(data) + co.flush()
1225 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001226 else:
1227 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001228 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1229 zinfo.compress_size > ZIP64_LIMIT
1230 if zip64 and not self._allowZip64:
1231 raise LargeZipFile("Filesize would require ZIP64 extensions")
1232 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum85825dc2007-08-27 17:03:28 +00001233 self.fp.write(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001234 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001235 # Write CRC and file sizes after the file data
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001236 fmt = '<LQQ' if zip64 else '<LLL'
1237 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001238 zinfo.file_size))
Serhiy Storchaka182d7cd2013-01-15 00:31:39 +02001239 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001240 self.filelist.append(zinfo)
1241 self.NameToInfo[zinfo.filename] = zinfo
1242
1243 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001244 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001245 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001246
1247 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001248 """Close the file, and for mode "w" and "a" write the ending
1249 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001250 if self.fp is None:
1251 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001252
Antoine Pitrou17babc52012-11-17 23:50:08 +01001253 try:
1254 if self.mode in ("w", "a") and self._didModify: # write ending records
1255 count = 0
1256 pos1 = self.fp.tell()
1257 for zinfo in self.filelist: # write central directory
1258 count = count + 1
1259 dt = zinfo.date_time
1260 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1261 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1262 extra = []
1263 if zinfo.file_size > ZIP64_LIMIT \
1264 or zinfo.compress_size > ZIP64_LIMIT:
1265 extra.append(zinfo.file_size)
1266 extra.append(zinfo.compress_size)
1267 file_size = 0xffffffff
1268 compress_size = 0xffffffff
1269 else:
1270 file_size = zinfo.file_size
1271 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001272
Antoine Pitrou17babc52012-11-17 23:50:08 +01001273 if zinfo.header_offset > ZIP64_LIMIT:
1274 extra.append(zinfo.header_offset)
1275 header_offset = 0xffffffff
1276 else:
1277 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001278
Antoine Pitrou17babc52012-11-17 23:50:08 +01001279 extra_data = zinfo.extra
1280 if extra:
1281 # Append a ZIP64 field to the extra's
1282 extra_data = struct.pack(
1283 '<HH' + 'Q'*len(extra),
1284 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001285
Antoine Pitrou17babc52012-11-17 23:50:08 +01001286 extract_version = max(45, zinfo.extract_version)
1287 create_version = max(45, zinfo.create_version)
1288 else:
1289 extract_version = zinfo.extract_version
1290 create_version = zinfo.create_version
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001291
Antoine Pitrou17babc52012-11-17 23:50:08 +01001292 try:
1293 filename, flag_bits = zinfo._encodeFilenameFlags()
1294 centdir = struct.pack(structCentralDir,
1295 stringCentralDir, create_version,
1296 zinfo.create_system, extract_version, zinfo.reserved,
1297 flag_bits, zinfo.compress_type, dostime, dosdate,
1298 zinfo.CRC, compress_size, file_size,
1299 len(filename), len(extra_data), len(zinfo.comment),
1300 0, zinfo.internal_attr, zinfo.external_attr,
1301 header_offset)
1302 except DeprecationWarning:
1303 print((structCentralDir, stringCentralDir, create_version,
1304 zinfo.create_system, extract_version, zinfo.reserved,
1305 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1306 zinfo.CRC, compress_size, file_size,
1307 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1308 0, zinfo.internal_attr, zinfo.external_attr,
1309 header_offset), file=sys.stderr)
1310 raise
1311 self.fp.write(centdir)
1312 self.fp.write(filename)
1313 self.fp.write(extra_data)
1314 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001315
Antoine Pitrou17babc52012-11-17 23:50:08 +01001316 pos2 = self.fp.tell()
1317 # Write end-of-zip-archive record
1318 centDirCount = count
1319 centDirSize = pos2 - pos1
1320 centDirOffset = pos1
1321 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1322 centDirOffset > ZIP64_LIMIT or
1323 centDirSize > ZIP64_LIMIT):
1324 # Need to write the ZIP64 end-of-archive records
1325 zip64endrec = struct.pack(
1326 structEndArchive64, stringEndArchive64,
1327 44, 45, 45, 0, 0, centDirCount, centDirCount,
1328 centDirSize, centDirOffset)
1329 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001330
Antoine Pitrou17babc52012-11-17 23:50:08 +01001331 zip64locrec = struct.pack(
1332 structEndArchive64Locator,
1333 stringEndArchive64Locator, 0, pos2, 1)
1334 self.fp.write(zip64locrec)
1335 centDirCount = min(centDirCount, 0xFFFF)
1336 centDirSize = min(centDirSize, 0xFFFFFFFF)
1337 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001338
Antoine Pitrou17babc52012-11-17 23:50:08 +01001339 endrec = struct.pack(structEndArchive, stringEndArchive,
1340 0, 0, centDirCount, centDirCount,
1341 centDirSize, centDirOffset, len(self._comment))
1342 self.fp.write(endrec)
1343 self.fp.write(self._comment)
1344 self.fp.flush()
1345 finally:
1346 fp = self.fp
1347 self.fp = None
1348 if not self._filePassed:
1349 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350
1351
1352class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001353 """Class to create ZIP archives with Python library files and packages."""
1354
Georg Brandl8334fd92010-12-04 10:26:46 +00001355 def __init__(self, file, mode="r", compression=ZIP_STORED,
1356 allowZip64=False, optimize=-1):
1357 ZipFile.__init__(self, file, mode=mode, compression=compression,
1358 allowZip64=allowZip64)
1359 self._optimize = optimize
1360
Georg Brandlfe991052009-09-16 15:54:04 +00001361 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 """Add all files from "pathname" to the ZIP archive.
1363
Fred Drake484d7352000-10-02 21:14:52 +00001364 If pathname is a package directory, search the directory and
1365 all package subdirectories recursively for all *.py and enter
1366 the modules into the archive. If pathname is a plain
1367 directory, listdir *.py and enter all modules. Else, pathname
1368 must be a Python *.py file and the module will be put into the
1369 archive. Added modules are always module.pyo or module.pyc.
1370 This method will compile the module.py into module.pyc if
1371 necessary.
1372 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 dir, name = os.path.split(pathname)
1374 if os.path.isdir(pathname):
1375 initname = os.path.join(pathname, "__init__.py")
1376 if os.path.isfile(initname):
1377 # This is a package directory, add it
1378 if basename:
1379 basename = "%s/%s" % (basename, name)
1380 else:
1381 basename = name
1382 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001383 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001384 fname, arcname = self._get_codename(initname[0:-3], basename)
1385 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001386 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001387 self.write(fname, arcname)
1388 dirlist = os.listdir(pathname)
1389 dirlist.remove("__init__.py")
1390 # Add all *.py files and package subdirectories
1391 for filename in dirlist:
1392 path = os.path.join(pathname, filename)
1393 root, ext = os.path.splitext(filename)
1394 if os.path.isdir(path):
1395 if os.path.isfile(os.path.join(path, "__init__.py")):
1396 # This is a package directory, add it
1397 self.writepy(path, basename) # Recursive call
1398 elif ext == ".py":
1399 fname, arcname = self._get_codename(path[0:-3],
1400 basename)
1401 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001402 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001403 self.write(fname, arcname)
1404 else:
1405 # This is NOT a package directory, add its files at top level
1406 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001407 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001408 for filename in os.listdir(pathname):
1409 path = os.path.join(pathname, filename)
1410 root, ext = os.path.splitext(filename)
1411 if ext == ".py":
1412 fname, arcname = self._get_codename(path[0:-3],
1413 basename)
1414 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001415 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001416 self.write(fname, arcname)
1417 else:
1418 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001419 raise RuntimeError(
1420 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001421 fname, arcname = self._get_codename(pathname[0:-3], basename)
1422 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001423 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001424 self.write(fname, arcname)
1425
1426 def _get_codename(self, pathname, basename):
1427 """Return (filename, archivename) for the path.
1428
Fred Drake484d7352000-10-02 21:14:52 +00001429 Given a module name path, return the correct file path and
1430 archive name, compiling if necessary. For example, given
1431 /python/lib/string, return (/python/lib/string.pyc, string).
1432 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001433 def _compile(file, optimize=-1):
1434 import py_compile
1435 if self.debug:
1436 print("Compiling", file)
1437 try:
1438 py_compile.compile(file, doraise=True, optimize=optimize)
1439 except py_compile.PyCompileError as error:
1440 print(err.msg)
1441 return False
1442 return True
1443
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001444 file_py = pathname + ".py"
1445 file_pyc = pathname + ".pyc"
1446 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001447 pycache_pyc = imp.cache_from_source(file_py, True)
1448 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001449 if self._optimize == -1:
1450 # legacy mode: use whatever file is present
1451 if (os.path.isfile(file_pyo) and
1452 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1453 # Use .pyo file.
1454 arcname = fname = file_pyo
1455 elif (os.path.isfile(file_pyc) and
1456 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1457 # Use .pyc file.
1458 arcname = fname = file_pyc
1459 elif (os.path.isfile(pycache_pyc) and
1460 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1461 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1462 # file name in the archive.
1463 fname = pycache_pyc
1464 arcname = file_pyc
1465 elif (os.path.isfile(pycache_pyo) and
1466 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1467 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1468 # file name in the archive.
1469 fname = pycache_pyo
1470 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001471 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001472 # Compile py into PEP 3147 pyc file.
1473 if _compile(file_py):
1474 fname = (pycache_pyc if __debug__ else pycache_pyo)
1475 arcname = (file_pyc if __debug__ else file_pyo)
1476 else:
1477 fname = arcname = file_py
1478 else:
1479 # new mode: use given optimization level
1480 if self._optimize == 0:
1481 fname = pycache_pyc
1482 arcname = file_pyc
1483 else:
1484 fname = pycache_pyo
1485 arcname = file_pyo
1486 if not (os.path.isfile(fname) and
1487 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1488 if not _compile(file_py, optimize=self._optimize):
1489 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001490 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001491 if basename:
1492 archivename = "%s/%s" % (basename, archivename)
1493 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001494
1495
1496def main(args = None):
1497 import textwrap
1498 USAGE=textwrap.dedent("""\
1499 Usage:
1500 zipfile.py -l zipfile.zip # Show listing of a zipfile
1501 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1502 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1503 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1504 """)
1505 if args is None:
1506 args = sys.argv[1:]
1507
1508 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001509 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001510 sys.exit(1)
1511
1512 if args[0] == '-l':
1513 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001514 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001515 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001516 with ZipFile(args[1], 'r') as zf:
1517 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001518
1519 elif args[0] == '-t':
1520 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001521 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001522 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001523 with ZipFile(args[1], 'r') as zf:
1524 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001525 if badfile:
1526 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001527 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001528
1529 elif args[0] == '-e':
1530 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001531 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001532 sys.exit(1)
1533
Antoine Pitrou17babc52012-11-17 23:50:08 +01001534 with ZipFile(args[1], 'r') as zf:
1535 out = args[2]
1536 for path in zf.namelist():
1537 if path.startswith('./'):
1538 tgt = os.path.join(out, path[2:])
1539 else:
1540 tgt = os.path.join(out, path)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001541
Antoine Pitrou17babc52012-11-17 23:50:08 +01001542 tgtdir = os.path.dirname(tgt)
1543 if not os.path.exists(tgtdir):
1544 os.makedirs(tgtdir)
1545 with open(tgt, 'wb') as fp:
1546 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001547
1548 elif args[0] == '-c':
1549 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001550 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001551 sys.exit(1)
1552
1553 def addToZip(zf, path, zippath):
1554 if os.path.isfile(path):
1555 zf.write(path, zippath, ZIP_DEFLATED)
1556 elif os.path.isdir(path):
1557 for nm in os.listdir(path):
1558 addToZip(zf,
1559 os.path.join(path, nm), os.path.join(zippath, nm))
1560 # else: ignore
1561
Antoine Pitrou17babc52012-11-17 23:50:08 +01001562 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1563 for src in args[2:]:
1564 addToZip(zf, src, os.path.basename(src))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001565
1566if __name__ == "__main__":
1567 main()