blob: bb3d4a22f6cb5cd20d491fca504cb796044cc8d8 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
180 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
Georg Brandl4d540882010-10-28 06:42:33 +0000184 raise BadZipZile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000191 struct.unpack(structEndArchive64, data)
192 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000196 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 endrec[_ECD_DISK_NUMBER] = disk_num
198 endrec[_ECD_DISK_START] = disk_dir
199 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
200 endrec[_ECD_ENTRIES_TOTAL] = dircount2
201 endrec[_ECD_SIZE] = dirsize
202 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206def _EndRecData(fpin):
207 """Return data from the "End of Central Directory" record, or None.
208
209 The data is a list of the nine items in the ZIP "End of central dir"
210 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211
212 # Determine file size
213 fpin.seek(0, 2)
214 filesize = fpin.tell()
215
216 # Check to see if this is ZIP file with no archive comment (the
217 # "end of central directory" structure should be the last item in the
218 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000219 try:
220 fpin.seek(-sizeEndCentDir, 2)
221 except IOError:
222 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000224 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec=list(endrec)
228
229 # Append a blank comment and record start offset
230 endrec.append(b"")
231 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000249 comment = data[start+sizeEndCentDir:]
250 # check that comment length is correct
251 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000252 # Append the archive comment and start offset
253 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000255
256 # Try to read the "Zip64 end of central directory" structure
257 return _EndRecData64(fpin, maxCommentStart + start - filesize,
258 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
260 # Unable to find a valid end of central directory structure
261 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000262
Fred Drake484d7352000-10-02 21:14:52 +0000263
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000264class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000265 """Class with attributes describing each file in the ZIP archive."""
266
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000267 __slots__ = (
268 'orig_filename',
269 'filename',
270 'date_time',
271 'compress_type',
272 'comment',
273 'extra',
274 'create_system',
275 'create_version',
276 'extract_version',
277 'reserved',
278 'flag_bits',
279 'volume',
280 'internal_attr',
281 'external_attr',
282 'header_offset',
283 'CRC',
284 'compress_size',
285 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000286 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000287 )
288
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000290 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291
292 # Terminate the file name at the first null byte. Null bytes in file
293 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000294 null_byte = filename.find(chr(0))
295 if null_byte >= 0:
296 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 # This is used to ensure paths in generated ZIP files always use
298 # forward slashes as the directory separator, as required by the
299 # ZIP format specification.
300 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000301 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000302
Greg Ward8e36d282003-06-18 00:53:06 +0000303 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000304 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000307 self.comment = b"" # Comment for each file
308 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000309 if sys.platform == 'win32':
310 self.create_system = 0 # System which created ZIP archive
311 else:
312 # Assume everything else is unix-y
313 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000314 self.create_version = 20 # Version which created ZIP archive
315 self.extract_version = 20 # Version needed to extract archive
316 self.reserved = 0 # Must be zero
317 self.flag_bits = 0 # ZIP flag bits
318 self.volume = 0 # Volume number of file header
319 self.internal_attr = 0 # Internal attributes
320 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000322 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000323 # CRC CRC-32 of the uncompressed file
324 # compress_size Size of the compressed file
325 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000326
327 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000328 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000329 dt = self.date_time
330 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000331 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000332 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000333 # Set these to zero because we write them after the file data
334 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 else:
Tim Peterse1190062001-01-15 03:34:38 +0000336 CRC = self.CRC
337 compress_size = self.compress_size
338 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339
340 extra = self.extra
341
342 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
343 # File is larger than what fits into a 4 byte integer,
344 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000345 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346 extra = extra + struct.pack(fmt,
347 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000348 file_size = 0xffffffff
349 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 self.extract_version = max(45, self.extract_version)
351 self.create_version = max(45, self.extract_version)
352
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000353 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000354 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000355 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 self.compress_type, dostime, dosdate, CRC,
357 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000358 len(filename), len(extra))
359 return header + filename + extra
360
361 def _encodeFilenameFlags(self):
362 try:
363 return self.filename.encode('ascii'), self.flag_bits
364 except UnicodeEncodeError:
365 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366
367 def _decodeExtra(self):
368 # Try to decode the extra field.
369 extra = self.extra
370 unpack = struct.unpack
371 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 if tp == 1:
374 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000375 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000376 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000379 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000380 elif ln == 0:
381 counts = ()
382 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000383 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000384
385 idx = 0
386
387 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000388 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000389 self.file_size = counts[idx]
390 idx += 1
391
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000392 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393 self.compress_size = counts[idx]
394 idx += 1
395
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000396 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000397 old = self.header_offset
398 self.header_offset = counts[idx]
399 idx+=1
400
401 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000402
403
Thomas Wouterscf297e42007-02-23 15:07:44 +0000404class _ZipDecrypter:
405 """Class to handle decryption of files stored within a ZIP archive.
406
407 ZIP supports a password-based form of encryption. Even though known
408 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000409 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000410
411 Usage:
412 zd = _ZipDecrypter(mypwd)
413 plain_char = zd(cypher_char)
414 plain_text = map(zd, cypher_text)
415 """
416
417 def _GenerateCRCTable():
418 """Generate a CRC-32 table.
419
420 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
421 internal keys. We noticed that a direct implementation is faster than
422 relying on binascii.crc32().
423 """
424 poly = 0xedb88320
425 table = [0] * 256
426 for i in range(256):
427 crc = i
428 for j in range(8):
429 if crc & 1:
430 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
431 else:
432 crc = ((crc >> 1) & 0x7FFFFFFF)
433 table[i] = crc
434 return table
435 crctable = _GenerateCRCTable()
436
437 def _crc32(self, ch, crc):
438 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000439 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000440
441 def __init__(self, pwd):
442 self.key0 = 305419896
443 self.key1 = 591751049
444 self.key2 = 878082192
445 for p in pwd:
446 self._UpdateKeys(p)
447
448 def _UpdateKeys(self, c):
449 self.key0 = self._crc32(c, self.key0)
450 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
451 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000452 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000453
454 def __call__(self, c):
455 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000456 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000457 k = self.key2 | 2
458 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000459 self._UpdateKeys(c)
460 return c
461
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000462class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000463 """File-like object for reading an archive member.
464 Is returned by ZipFile.open().
465 """
466
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000467 # Max size supported by decompressor.
468 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000470 # Read from compressed files in 4k blocks.
471 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000472
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000473 # Search for universal newlines or line chunks.
474 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
475
476 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
477 self._fileobj = fileobj
478 self._decrypter = decrypter
479
Ezio Melotti92b47432010-01-28 01:44:41 +0000480 self._compress_type = zipinfo.compress_type
481 self._compress_size = zipinfo.compress_size
482 self._compress_left = zipinfo.compress_size
483
484 if self._compress_type == ZIP_DEFLATED:
485 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000486 self._unconsumed = b''
487
488 self._readbuffer = b''
489 self._offset = 0
490
491 self._universal = 'U' in mode
492 self.newlines = None
493
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000494 # Adjust read size for encrypted files since the first 12 bytes
495 # are for the encryption/password information.
496 if self._decrypter is not None:
497 self._compress_left -= 12
498
499 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000500 self.name = zipinfo.filename
501
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000502 if hasattr(zipinfo, 'CRC'):
503 self._expected_crc = zipinfo.CRC
504 self._running_crc = crc32(b'') & 0xffffffff
505 else:
506 self._expected_crc = None
507
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000508 def readline(self, limit=-1):
509 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000510
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000511 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000512 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000513
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000514 if not self._universal and limit < 0:
515 # Shortcut common case - newline found in buffer.
516 i = self._readbuffer.find(b'\n', self._offset) + 1
517 if i > 0:
518 line = self._readbuffer[self._offset: i]
519 self._offset = i
520 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000521
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000522 if not self._universal:
523 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000524
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000525 line = b''
526 while limit < 0 or len(line) < limit:
527 readahead = self.peek(2)
528 if readahead == b'':
529 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000530
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000531 #
532 # Search for universal newlines or line chunks.
533 #
534 # The pattern returns either a line chunk or a newline, but not
535 # both. Combined with peek(2), we are assured that the sequence
536 # '\r\n' is always retrieved completely and never split into
537 # separate newlines - '\r', '\n' due to coincidental readaheads.
538 #
539 match = self.PATTERN.search(readahead)
540 newline = match.group('newline')
541 if newline is not None:
542 if self.newlines is None:
543 self.newlines = []
544 if newline not in self.newlines:
545 self.newlines.append(newline)
546 self._offset += len(newline)
547 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000548
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000549 chunk = match.group('chunk')
550 if limit >= 0:
551 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000553 self._offset += len(chunk)
554 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000555
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000556 return line
557
558 def peek(self, n=1):
559 """Returns buffered bytes without advancing the position."""
560 if n > len(self._readbuffer) - self._offset:
561 chunk = self.read(n)
562 self._offset -= len(chunk)
563
564 # Return up to 512 bytes to reduce allocation overhead for tight loops.
565 return self._readbuffer[self._offset: self._offset + 512]
566
567 def readable(self):
568 return True
569
570 def read(self, n=-1):
571 """Read and return up to n bytes.
572 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000574 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000575 if n is None:
576 n = -1
577 while True:
578 if n < 0:
579 data = self.read1(n)
580 elif n > len(buf):
581 data = self.read1(n - len(buf))
582 else:
583 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000584 if len(data) == 0:
585 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000586 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000587
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000588 def _update_crc(self, newdata, eof):
589 # Update the CRC using the given data.
590 if self._expected_crc is None:
591 # No need to compute the CRC if we don't have a reference value
592 return
593 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
594 # Check the CRC if we're at the end of the file
595 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000596 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000597
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000598 def read1(self, n):
599 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000600
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000601 # Simplify algorithm (branching) by transforming negative n to large n.
602 if n < 0 or n is None:
603 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000605 # Bytes available in read buffer.
606 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000607
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000608 # Read from file.
609 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
610 nbytes = n - len_readbuffer - len(self._unconsumed)
611 nbytes = max(nbytes, self.MIN_READ_SIZE)
612 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000613
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000614 data = self._fileobj.read(nbytes)
615 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000616
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000617 if data and self._decrypter is not None:
618 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000619
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000620 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000621 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622 self._readbuffer = self._readbuffer[self._offset:] + data
623 self._offset = 0
624 else:
625 # Prepare deflated bytes for decompression.
626 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000627
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000628 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000629 if (len(self._unconsumed) > 0 and n > len_readbuffer and
630 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000631 data = self._decompressor.decompress(
632 self._unconsumed,
633 max(n - len_readbuffer, self.MIN_READ_SIZE)
634 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000635
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000636 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000637 eof = len(self._unconsumed) == 0 and self._compress_left == 0
638 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000639 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000640
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000641 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000642 self._readbuffer = self._readbuffer[self._offset:] + data
643 self._offset = 0
644
645 # Read from buffer.
646 data = self._readbuffer[self._offset: self._offset + n]
647 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000648 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000649
650
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000651
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000652class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000653 """ Class with methods to open, read, write, close, list zip files.
654
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000655 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000656
Fred Drake3d9091e2001-03-26 15:49:24 +0000657 file: Either the path to the file, or a file-like object.
658 If it is a path, the file will be opened and closed by ZipFile.
659 mode: The mode can be either read "r", write "w" or append "a".
660 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000661 allowZip64: if True ZipFile will create files with ZIP64 extensions when
662 needed, otherwise it will raise an exception when this would
663 be necessary.
664
Fred Drake3d9091e2001-03-26 15:49:24 +0000665 """
Fred Drake484d7352000-10-02 21:14:52 +0000666
Fred Drake90eac282001-02-28 05:29:34 +0000667 fp = None # Set here since __del__ checks it
668
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000669 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000670 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000671 if mode not in ("r", "w", "a"):
672 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
673
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000674 if compression == ZIP_STORED:
675 pass
676 elif compression == ZIP_DEFLATED:
677 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000678 raise RuntimeError(
679 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000680 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000681 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000682
683 self._allowZip64 = allowZip64
684 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000685 self.debug = 0 # Level of printing: 0 through 3
686 self.NameToInfo = {} # Find file info given name
687 self.filelist = [] # List of ZipInfo instances for archive
688 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000689 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000690 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000691 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000692
Fred Drake3d9091e2001-03-26 15:49:24 +0000693 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000694 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000695 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000696 self._filePassed = 0
697 self.filename = file
698 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000699 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000700 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000701 except IOError:
702 if mode == 'a':
703 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000704 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000705 else:
706 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000707 else:
708 self._filePassed = 1
709 self.fp = file
710 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000711
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 self._GetContents()
714 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000715 # set the modified flag so central directory gets written
716 # even if no files are added to the archive
717 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000719 try:
720 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000721 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000723 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000724 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000725 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000726 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000727
728 # set the modified flag so central directory gets written
729 # even if no files are added to the archive
730 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000732 if not self._filePassed:
733 self.fp.close()
734 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000735 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000737 def __enter__(self):
738 return self
739
740 def __exit__(self, type, value, traceback):
741 self.close()
742
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000744 """Read the directory, making sure we close the file if the format
745 is bad."""
746 try:
747 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000748 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000749 if not self._filePassed:
750 self.fp.close()
751 self.fp = None
752 raise
753
754 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000755 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000757 try:
758 endrec = _EndRecData(fp)
759 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000760 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000761 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000762 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000763 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000764 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000765 size_cd = endrec[_ECD_SIZE] # bytes in central directory
766 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
767 self.comment = endrec[_ECD_COMMENT] # archive comment
768
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000769 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000770 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000771 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
772 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000773 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
774
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000776 inferred = concat + offset_cd
777 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778 # self.start_dir: Position of start of central directory
779 self.start_dir = offset_cd + concat
780 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000781 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000782 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 total = 0
784 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000785 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000786 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000787 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 centdir = struct.unpack(structCentralDir, centdir)
789 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000790 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000791 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000792 flags = centdir[5]
793 if flags & 0x800:
794 # UTF-8 file names extension
795 filename = filename.decode('utf-8')
796 else:
797 # Historical ZIP filename encoding
798 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000799 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000800 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000801 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
802 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000803 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 (x.create_version, x.create_system, x.extract_version, x.reserved,
805 x.flag_bits, x.compress_type, t, d,
806 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
807 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
808 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000809 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000811 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000812
813 x._decodeExtra()
814 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000815 self.filelist.append(x)
816 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000817
818 # update total bytes read from central directory
819 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
820 + centdir[_CD_EXTRA_FIELD_LENGTH]
821 + centdir[_CD_COMMENT_LENGTH])
822
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000824 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000825
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826
827 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000828 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829 l = []
830 for data in self.filelist:
831 l.append(data.filename)
832 return l
833
834 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000835 """Return a list of class ZipInfo instances for files in the
836 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 return self.filelist
838
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000839 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000840 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000841 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
842 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000843 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000844 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000845 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
846 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000847
848 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000849 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000850 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000851 for zinfo in self.filelist:
852 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000853 # Read by chunks, to avoid an OverflowError or a
854 # MemoryError with very large embedded files.
855 f = self.open(zinfo.filename, "r")
856 while f.read(chunk_size): # Check CRC-32
857 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000858 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000859 return zinfo.filename
860
861 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000862 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000863 info = self.NameToInfo.get(name)
864 if info is None:
865 raise KeyError(
866 'There is no item named %r in the archive' % name)
867
868 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000869
Thomas Wouterscf297e42007-02-23 15:07:44 +0000870 def setpassword(self, pwd):
871 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000872 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000873 self.pwd = pwd
874
875 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000876 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877 return self.open(name, "r", pwd).read()
878
879 def open(self, name, mode="r", pwd=None):
880 """Return file-like object for 'name'."""
881 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000882 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000883 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000884 raise RuntimeError(
885 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000886
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 # Only open a new file for instances where we were not
888 # given a file object in the constructor
889 if self._filePassed:
890 zef_file = self.fp
891 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000892 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893
Georg Brandlb533e262008-05-25 18:19:30 +0000894 # Make sure we have an info object
895 if isinstance(name, ZipInfo):
896 # 'name' is already an info object
897 zinfo = name
898 else:
899 # Get info object for name
900 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901
902 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000903
904 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000905 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000906 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +0000907 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000908
909 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000911 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000913
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000914 if fname != zinfo.orig_filename.encode("utf-8"):
Georg Brandl4d540882010-10-28 06:42:33 +0000915 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +0000916 'File name in directory %r and header %r differ.'
917 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000918
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919 # check for encrypted flag & handle password
920 is_encrypted = zinfo.flag_bits & 0x1
921 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000922 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923 if not pwd:
924 pwd = self.pwd
925 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000926 raise RuntimeError("File %s is encrypted, "
927 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928
Thomas Wouterscf297e42007-02-23 15:07:44 +0000929 zd = _ZipDecrypter(pwd)
930 # The first 12 bytes in the cypher stream is an encryption header
931 # used to strengthen the algorithm. The first 11 bytes are
932 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000933 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000934 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000935 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000936 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000937 if zinfo.flag_bits & 0x8:
938 # compare against the file type from extended local headers
939 check_byte = (zinfo._raw_time >> 8) & 0xff
940 else:
941 # compare against the CRC otherwise
942 check_byte = (zinfo.CRC >> 24) & 0xff
943 if h[11] != check_byte:
944 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000946 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000947
Christian Heimes790c8232008-01-07 21:14:23 +0000948 def extract(self, member, path=None, pwd=None):
949 """Extract a member from the archive to the current working directory,
950 using its full name. Its file information is extracted as accurately
951 as possible. `member' may be a filename or a ZipInfo object. You can
952 specify a different directory using `path'.
953 """
954 if not isinstance(member, ZipInfo):
955 member = self.getinfo(member)
956
957 if path is None:
958 path = os.getcwd()
959
960 return self._extract_member(member, path, pwd)
961
962 def extractall(self, path=None, members=None, pwd=None):
963 """Extract all members from the archive to the current working
964 directory. `path' specifies a different directory to extract to.
965 `members' is optional and must be a subset of the list returned
966 by namelist().
967 """
968 if members is None:
969 members = self.namelist()
970
971 for zipinfo in members:
972 self.extract(zipinfo, path, pwd)
973
974 def _extract_member(self, member, targetpath, pwd):
975 """Extract the ZipInfo object 'member' to a physical
976 file on the path targetpath.
977 """
978 # build the destination pathname, replacing
979 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000980 # Strip trailing path separator, unless it represents the root.
981 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
982 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000983 targetpath = targetpath[:-1]
984
985 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000986 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000987 targetpath = os.path.join(targetpath, member.filename[1:])
988 else:
989 targetpath = os.path.join(targetpath, member.filename)
990
991 targetpath = os.path.normpath(targetpath)
992
993 # Create all upper directories if necessary.
994 upperdirs = os.path.dirname(targetpath)
995 if upperdirs and not os.path.exists(upperdirs):
996 os.makedirs(upperdirs)
997
Martin v. Löwis59e47792009-01-24 14:10:07 +0000998 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000999 if not os.path.isdir(targetpath):
1000 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001001 return targetpath
1002
Georg Brandlb533e262008-05-25 18:19:30 +00001003 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001004 target = open(targetpath, "wb")
1005 shutil.copyfileobj(source, target)
1006 source.close()
1007 target.close()
1008
1009 return targetpath
1010
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001011 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001012 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001013 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001014 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001015 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001016 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001017 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001018 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001019 raise RuntimeError(
1020 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001021 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001022 raise RuntimeError(
1023 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001025 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001026 if zinfo.file_size > ZIP64_LIMIT:
1027 if not self._allowZip64:
1028 raise LargeZipFile("Filesize would require ZIP64 extensions")
1029 if zinfo.header_offset > ZIP64_LIMIT:
1030 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001031 raise LargeZipFile(
1032 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001033
1034 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001035 """Put the bytes from filename into the archive under the name
1036 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001037 if not self.fp:
1038 raise RuntimeError(
1039 "Attempt to write to ZIP archive that was already closed")
1040
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001042 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001043 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001044 date_time = mtime[0:6]
1045 # Create ZipInfo instance to store file information
1046 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001047 arcname = filename
1048 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1049 while arcname[0] in (os.sep, os.altsep):
1050 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001051 if isdir:
1052 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001053 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001054 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001055 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001056 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001057 else:
Tim Peterse1190062001-01-15 03:34:38 +00001058 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001059
1060 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001061 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001062 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001063
1064 self._writecheck(zinfo)
1065 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001066
1067 if isdir:
1068 zinfo.file_size = 0
1069 zinfo.compress_size = 0
1070 zinfo.CRC = 0
1071 self.filelist.append(zinfo)
1072 self.NameToInfo[zinfo.filename] = zinfo
1073 self.fp.write(zinfo.FileHeader())
1074 return
1075
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001076 with open(filename, "rb") as fp:
1077 # Must overwrite CRC and sizes with correct data later
1078 zinfo.CRC = CRC = 0
1079 zinfo.compress_size = compress_size = 0
1080 zinfo.file_size = file_size = 0
1081 self.fp.write(zinfo.FileHeader())
1082 if zinfo.compress_type == ZIP_DEFLATED:
1083 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1084 zlib.DEFLATED, -15)
1085 else:
1086 cmpr = None
1087 while 1:
1088 buf = fp.read(1024 * 8)
1089 if not buf:
1090 break
1091 file_size = file_size + len(buf)
1092 CRC = crc32(buf, CRC) & 0xffffffff
1093 if cmpr:
1094 buf = cmpr.compress(buf)
1095 compress_size = compress_size + len(buf)
1096 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001097 if cmpr:
1098 buf = cmpr.flush()
1099 compress_size = compress_size + len(buf)
1100 self.fp.write(buf)
1101 zinfo.compress_size = compress_size
1102 else:
1103 zinfo.compress_size = file_size
1104 zinfo.CRC = CRC
1105 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001106 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001107 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001108 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001109 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001110 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001111 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112 self.filelist.append(zinfo)
1113 self.NameToInfo[zinfo.filename] = zinfo
1114
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001115 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001116 """Write a file into the archive. The contents is 'data', which
1117 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1118 it is encoded as UTF-8 first.
1119 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001120 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001121 if isinstance(data, str):
1122 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001123 if not isinstance(zinfo_or_arcname, ZipInfo):
1124 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001125 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001126 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001127 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001128 else:
1129 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001130
1131 if not self.fp:
1132 raise RuntimeError(
1133 "Attempt to write to ZIP archive that was already closed")
1134
Guido van Rossum85825dc2007-08-27 17:03:28 +00001135 zinfo.file_size = len(data) # Uncompressed size
1136 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001137 if compress_type is not None:
1138 zinfo.compress_type = compress_type
1139
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001140 self._writecheck(zinfo)
1141 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001142 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143 if zinfo.compress_type == ZIP_DEFLATED:
1144 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1145 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001146 data = co.compress(data) + co.flush()
1147 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001148 else:
1149 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001150 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001151 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001152 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001153 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001155 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001156 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001157 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001158 self.filelist.append(zinfo)
1159 self.NameToInfo[zinfo.filename] = zinfo
1160
1161 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001162 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001163 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001164
1165 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001166 """Close the file, and for mode "w" and "a" write the ending
1167 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001168 if self.fp is None:
1169 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001170
1171 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001172 count = 0
1173 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001174 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175 count = count + 1
1176 dt = zinfo.date_time
1177 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001178 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001179 extra = []
1180 if zinfo.file_size > ZIP64_LIMIT \
1181 or zinfo.compress_size > ZIP64_LIMIT:
1182 extra.append(zinfo.file_size)
1183 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001184 file_size = 0xffffffff
1185 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001186 else:
1187 file_size = zinfo.file_size
1188 compress_size = zinfo.compress_size
1189
1190 if zinfo.header_offset > ZIP64_LIMIT:
1191 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001192 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001193 else:
1194 header_offset = zinfo.header_offset
1195
1196 extra_data = zinfo.extra
1197 if extra:
1198 # Append a ZIP64 field to the extra's
1199 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001200 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001201 1, 8*len(extra), *extra) + extra_data
1202
1203 extract_version = max(45, zinfo.extract_version)
1204 create_version = max(45, zinfo.create_version)
1205 else:
1206 extract_version = zinfo.extract_version
1207 create_version = zinfo.create_version
1208
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001209 try:
1210 filename, flag_bits = zinfo._encodeFilenameFlags()
1211 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001212 stringCentralDir, create_version,
1213 zinfo.create_system, extract_version, zinfo.reserved,
1214 flag_bits, zinfo.compress_type, dostime, dosdate,
1215 zinfo.CRC, compress_size, file_size,
1216 len(filename), len(extra_data), len(zinfo.comment),
1217 0, zinfo.internal_attr, zinfo.external_attr,
1218 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001219 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001220 print((structCentralDir, stringCentralDir, create_version,
1221 zinfo.create_system, extract_version, zinfo.reserved,
1222 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1223 zinfo.CRC, compress_size, file_size,
1224 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1225 0, zinfo.internal_attr, zinfo.external_attr,
1226 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001227 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001228 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001229 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001231 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001232
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 pos2 = self.fp.tell()
1234 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001235 centDirCount = count
1236 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001237 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001238 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1239 centDirOffset > ZIP64_LIMIT or
1240 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001241 # Need to write the ZIP64 end-of-archive records
1242 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001243 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001244 44, 45, 45, 0, 0, centDirCount, centDirCount,
1245 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001246 self.fp.write(zip64endrec)
1247
1248 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001249 structEndArchive64Locator,
1250 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001251 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001252 centDirCount = min(centDirCount, 0xFFFF)
1253 centDirSize = min(centDirSize, 0xFFFFFFFF)
1254 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001255
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001256 # check for valid comment length
1257 if len(self.comment) >= ZIP_MAX_COMMENT:
1258 if self.debug > 0:
1259 msg = 'Archive comment is too long; truncating to %d bytes' \
1260 % ZIP_MAX_COMMENT
1261 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001262
Georg Brandl2ee470f2008-07-16 12:55:28 +00001263 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001264 0, 0, centDirCount, centDirCount,
1265 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001266 self.fp.write(endrec)
1267 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001268 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001269
Fred Drake3d9091e2001-03-26 15:49:24 +00001270 if not self._filePassed:
1271 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001272 self.fp = None
1273
1274
1275class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001276 """Class to create ZIP archives with Python library files and packages."""
1277
Georg Brandlfe991052009-09-16 15:54:04 +00001278 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001279 """Add all files from "pathname" to the ZIP archive.
1280
Fred Drake484d7352000-10-02 21:14:52 +00001281 If pathname is a package directory, search the directory and
1282 all package subdirectories recursively for all *.py and enter
1283 the modules into the archive. If pathname is a plain
1284 directory, listdir *.py and enter all modules. Else, pathname
1285 must be a Python *.py file and the module will be put into the
1286 archive. Added modules are always module.pyo or module.pyc.
1287 This method will compile the module.py into module.pyc if
1288 necessary.
1289 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001290 dir, name = os.path.split(pathname)
1291 if os.path.isdir(pathname):
1292 initname = os.path.join(pathname, "__init__.py")
1293 if os.path.isfile(initname):
1294 # This is a package directory, add it
1295 if basename:
1296 basename = "%s/%s" % (basename, name)
1297 else:
1298 basename = name
1299 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001300 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 fname, arcname = self._get_codename(initname[0:-3], basename)
1302 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001303 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 self.write(fname, arcname)
1305 dirlist = os.listdir(pathname)
1306 dirlist.remove("__init__.py")
1307 # Add all *.py files and package subdirectories
1308 for filename in dirlist:
1309 path = os.path.join(pathname, filename)
1310 root, ext = os.path.splitext(filename)
1311 if os.path.isdir(path):
1312 if os.path.isfile(os.path.join(path, "__init__.py")):
1313 # This is a package directory, add it
1314 self.writepy(path, basename) # Recursive call
1315 elif ext == ".py":
1316 fname, arcname = self._get_codename(path[0:-3],
1317 basename)
1318 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001319 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 self.write(fname, arcname)
1321 else:
1322 # This is NOT a package directory, add its files at top level
1323 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001324 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001325 for filename in os.listdir(pathname):
1326 path = os.path.join(pathname, filename)
1327 root, ext = os.path.splitext(filename)
1328 if ext == ".py":
1329 fname, arcname = self._get_codename(path[0:-3],
1330 basename)
1331 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001332 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001333 self.write(fname, arcname)
1334 else:
1335 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001336 raise RuntimeError(
1337 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 fname, arcname = self._get_codename(pathname[0:-3], basename)
1339 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001340 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001341 self.write(fname, arcname)
1342
1343 def _get_codename(self, pathname, basename):
1344 """Return (filename, archivename) for the path.
1345
Fred Drake484d7352000-10-02 21:14:52 +00001346 Given a module name path, return the correct file path and
1347 archive name, compiling if necessary. For example, given
1348 /python/lib/string, return (/python/lib/string.pyc, string).
1349 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001350 file_py = pathname + ".py"
1351 file_pyc = pathname + ".pyc"
1352 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001353 pycache_pyc = imp.cache_from_source(file_py, True)
1354 pycache_pyo = imp.cache_from_source(file_py, False)
1355 if (os.path.isfile(file_pyo) and
1356 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1357 # Use .pyo file.
1358 arcname = fname = file_pyo
1359 elif (os.path.isfile(file_pyc) and
1360 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1361 # Use .pyc file.
1362 arcname = fname = file_pyc
1363 elif (os.path.isfile(pycache_pyc) and
1364 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1365 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1366 # file name in the archive.
1367 fname = pycache_pyc
1368 arcname = file_pyc
1369 elif (os.path.isfile(pycache_pyo) and
1370 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1371 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1372 # file name in the archive.
1373 fname = pycache_pyo
1374 arcname = file_pyo
1375 else:
1376 # Compile py into PEP 3147 pyc file.
Fred Drake484d7352000-10-02 21:14:52 +00001377 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001378 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001379 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001380 try:
Barry Warsaw28a691b2010-04-17 00:19:56 +00001381 py_compile.compile(file_py, doraise=True)
1382 except py_compile.PyCompileError as error:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001383 print(err.msg)
Barry Warsaw28a691b2010-04-17 00:19:56 +00001384 fname = file_py
1385 else:
1386 fname = (pycache_pyc if __debug__ else pycache_pyo)
1387 arcname = (file_pyc if __debug__ else file_pyo)
1388 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001389 if basename:
1390 archivename = "%s/%s" % (basename, archivename)
1391 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001392
1393
1394def main(args = None):
1395 import textwrap
1396 USAGE=textwrap.dedent("""\
1397 Usage:
1398 zipfile.py -l zipfile.zip # Show listing of a zipfile
1399 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1400 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1401 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1402 """)
1403 if args is None:
1404 args = sys.argv[1:]
1405
1406 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001407 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001408 sys.exit(1)
1409
1410 if args[0] == '-l':
1411 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001412 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001413 sys.exit(1)
1414 zf = ZipFile(args[1], 'r')
1415 zf.printdir()
1416 zf.close()
1417
1418 elif args[0] == '-t':
1419 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001420 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001421 sys.exit(1)
1422 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001423 badfile = zf.testzip()
1424 if badfile:
1425 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001426 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001427
1428 elif args[0] == '-e':
1429 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001430 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001431 sys.exit(1)
1432
1433 zf = ZipFile(args[1], 'r')
1434 out = args[2]
1435 for path in zf.namelist():
1436 if path.startswith('./'):
1437 tgt = os.path.join(out, path[2:])
1438 else:
1439 tgt = os.path.join(out, path)
1440
1441 tgtdir = os.path.dirname(tgt)
1442 if not os.path.exists(tgtdir):
1443 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001444 with open(tgt, 'wb') as fp:
1445 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001446 zf.close()
1447
1448 elif args[0] == '-c':
1449 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001450 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001451 sys.exit(1)
1452
1453 def addToZip(zf, path, zippath):
1454 if os.path.isfile(path):
1455 zf.write(path, zippath, ZIP_DEFLATED)
1456 elif os.path.isdir(path):
1457 for nm in os.listdir(path):
1458 addToZip(zf,
1459 os.path.join(path, nm), os.path.join(zippath, nm))
1460 # else: ignore
1461
1462 zf = ZipFile(args[1], 'w', allowZip64=True)
1463 for src in args[2:]:
1464 addToZip(zf, src, os.path.basename(src))
1465
1466 zf.close()
1467
1468if __name__ == "__main__":
1469 main()