blob: 7ee5e2f04655454d7a1a619fa342cef45f5aab7e [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Martin v. Löwis59e47792009-01-24 14:10:07 +00007import binascii, io, stat
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import io
9import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010
11try:
Tim Peterse1190062001-01-15 03:34:38 +000012 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000013 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000014except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000016 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
Skip Montanaro40fc1602001-03-01 04:27:19 +000018__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000019 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000020
Fred Drake5db246d2000-09-29 20:44:48 +000021class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000023
24
25class LargeZipFile(Exception):
26 """
27 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
28 and those extensions are disabled.
29 """
30
Tim Peterse1190062001-01-15 03:34:38 +000031error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000032
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000033ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000034ZIP_FILECOUNT_LIMIT = 1 << 16
35ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000036
Guido van Rossum32abe6f2000-03-31 17:30:02 +000037# constants for Zip file compression methods
38ZIP_STORED = 0
39ZIP_DEFLATED = 8
40# Other ZIP compression methods not supported
41
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042# Below are some formats and associated data for reading/writing headers using
43# the struct module. The names and structures of headers/records are those used
44# in the PKWARE description of the ZIP file format:
45# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
46# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000047
Martin v. Löwisb09b8442008-07-03 14:13:42 +000048# The "end of central directory" structure, magic number, size, and indices
49# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000050structEndArchive = b"<4s4H2LH"
51stringEndArchive = b"PK\005\006"
52sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053
54_ECD_SIGNATURE = 0
55_ECD_DISK_NUMBER = 1
56_ECD_DISK_START = 2
57_ECD_ENTRIES_THIS_DISK = 3
58_ECD_ENTRIES_TOTAL = 4
59_ECD_SIZE = 5
60_ECD_OFFSET = 6
61_ECD_COMMENT_SIZE = 7
62# These last two indices are not part of the structure as defined in the
63# spec, but they are used internally by this module as a convenience
64_ECD_COMMENT = 8
65_ECD_LOCATION = 9
66
67# The "central directory" structure, magic number, size, and indices
68# of entries in the structure (section V.F in the format document)
69structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000070stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071sizeCentralDir = struct.calcsize(structCentralDir)
72
Fred Drake3e038e52001-02-28 17:56:26 +000073# indexes of entries in the central directory structure
74_CD_SIGNATURE = 0
75_CD_CREATE_VERSION = 1
76_CD_CREATE_SYSTEM = 2
77_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000079_CD_FLAG_BITS = 5
80_CD_COMPRESS_TYPE = 6
81_CD_TIME = 7
82_CD_DATE = 8
83_CD_CRC = 9
84_CD_COMPRESSED_SIZE = 10
85_CD_UNCOMPRESSED_SIZE = 11
86_CD_FILENAME_LENGTH = 12
87_CD_EXTRA_FIELD_LENGTH = 13
88_CD_COMMENT_LENGTH = 14
89_CD_DISK_NUMBER_START = 15
90_CD_INTERNAL_FILE_ATTRIBUTES = 16
91_CD_EXTERNAL_FILE_ATTRIBUTES = 17
92_CD_LOCAL_HEADER_OFFSET = 18
93
Martin v. Löwisb09b8442008-07-03 14:13:42 +000094# The "local file header" structure, magic number, size, and indices
95# (section V.A in the format document)
96structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeFileHeader = struct.calcsize(structFileHeader)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100_FH_SIGNATURE = 0
101_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000103_FH_GENERAL_PURPOSE_FLAG_BITS = 3
104_FH_COMPRESSION_METHOD = 4
105_FH_LAST_MOD_TIME = 5
106_FH_LAST_MOD_DATE = 6
107_FH_CRC = 7
108_FH_COMPRESSED_SIZE = 8
109_FH_UNCOMPRESSED_SIZE = 9
110_FH_FILENAME_LENGTH = 10
111_FH_EXTRA_FIELD_LENGTH = 11
112
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000113# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000114structEndArchive64Locator = "<4sLQL"
115stringEndArchive64Locator = b"PK\x06\x07"
116sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000117
118# The "Zip64 end of central directory" record, magic number, size, and indices
119# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000120structEndArchive64 = "<4sQ2H2L4Q"
121stringEndArchive64 = b"PK\x06\x06"
122sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123
124_CD64_SIGNATURE = 0
125_CD64_DIRECTORY_RECSIZE = 1
126_CD64_CREATE_VERSION = 2
127_CD64_EXTRACT_VERSION = 3
128_CD64_DISK_NUMBER = 4
129_CD64_DISK_NUMBER_START = 5
130_CD64_NUMBER_ENTRIES_THIS_DISK = 6
131_CD64_NUMBER_ENTRIES_TOTAL = 7
132_CD64_DIRECTORY_SIZE = 8
133_CD64_OFFSET_START_CENTDIR = 9
134
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000137 if _EndRecData(fp):
138 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000139 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000141 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def is_zipfile(filename):
144 """Quickly see if a file is a ZIP file by checking the magic number.
145
146 The filename argument may be a file or file-like object too.
147 """
148 result = False
149 try:
150 if hasattr(filename, "read"):
151 result = _check_zipfile(fp=filename)
152 else:
153 with open(filename, "rb") as fp:
154 result = _check_zipfile(fp)
155 except IOError:
156 pass
157 return result
158
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000159def _EndRecData64(fpin, offset, endrec):
160 """
161 Read the ZIP64 end-of-archive records and use that to update endrec
162 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000163 fpin.seek(offset - sizeEndCentDir64Locator, 2)
164 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000165 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
166 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167 return endrec
168
169 if diskno != 0 or disks != 1:
170 raise BadZipfile("zipfiles that span multiple disks are not supported")
171
172 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000173 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
174 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000175 sig, sz, create_version, read_version, disk_num, disk_dir, \
176 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000177 struct.unpack(structEndArchive64, data)
178 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000179 return endrec
180
181 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000182 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000183 endrec[_ECD_DISK_NUMBER] = disk_num
184 endrec[_ECD_DISK_START] = disk_dir
185 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
186 endrec[_ECD_ENTRIES_TOTAL] = dircount2
187 endrec[_ECD_SIZE] = dirsize
188 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 return endrec
190
191
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000192def _EndRecData(fpin):
193 """Return data from the "End of Central Directory" record, or None.
194
195 The data is a list of the nine items in the ZIP "End of central dir"
196 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197
198 # Determine file size
199 fpin.seek(0, 2)
200 filesize = fpin.tell()
201
202 # Check to see if this is ZIP file with no archive comment (the
203 # "end of central directory" structure should be the last item in the
204 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000205 try:
206 fpin.seek(-sizeEndCentDir, 2)
207 except IOError:
208 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000209 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000210 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000212 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213 endrec=list(endrec)
214
215 # Append a blank comment and record start offset
216 endrec.append(b"")
217 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000219 # Try to read the "Zip64 end of central directory" structure
220 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221
222 # Either this is not a ZIP file, or it is a ZIP file with an archive
223 # comment. Search the end of the file for the "end of central directory"
224 # record signature. The comment is the last item in the ZIP file and may be
225 # up to 64K long. It is assumed that the "end of central directory" magic
226 # number does not appear in the comment.
227 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
228 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000230 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231 if start >= 0:
232 # found the magic number; attempt to unpack and interpret
233 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000234 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235 comment = data[start+sizeEndCentDir:]
236 # check that comment length is correct
237 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000238 # Append the archive comment and start offset
239 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000240 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000241
242 # Try to read the "Zip64 end of central directory" structure
243 return _EndRecData64(fpin, maxCommentStart + start - filesize,
244 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245
246 # Unable to find a valid end of central directory structure
247 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248
Fred Drake484d7352000-10-02 21:14:52 +0000249
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000250class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000251 """Class with attributes describing each file in the ZIP archive."""
252
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000253 __slots__ = (
254 'orig_filename',
255 'filename',
256 'date_time',
257 'compress_type',
258 'comment',
259 'extra',
260 'create_system',
261 'create_version',
262 'extract_version',
263 'reserved',
264 'flag_bits',
265 'volume',
266 'internal_attr',
267 'external_attr',
268 'header_offset',
269 'CRC',
270 'compress_size',
271 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000272 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000273 )
274
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000275 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000276 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000277
278 # Terminate the file name at the first null byte. Null bytes in file
279 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000280 null_byte = filename.find(chr(0))
281 if null_byte >= 0:
282 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000283 # This is used to ensure paths in generated ZIP files always use
284 # forward slashes as the directory separator, as required by the
285 # ZIP format specification.
286 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000287 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288
Greg Ward8e36d282003-06-18 00:53:06 +0000289 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000291 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000293 self.comment = b"" # Comment for each file
294 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000295 if sys.platform == 'win32':
296 self.create_system = 0 # System which created ZIP archive
297 else:
298 # Assume everything else is unix-y
299 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000300 self.create_version = 20 # Version which created ZIP archive
301 self.extract_version = 20 # Version needed to extract archive
302 self.reserved = 0 # Must be zero
303 self.flag_bits = 0 # ZIP flag bits
304 self.volume = 0 # Volume number of file header
305 self.internal_attr = 0 # Internal attributes
306 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000308 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000309 # CRC CRC-32 of the uncompressed file
310 # compress_size Size of the compressed file
311 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000312
313 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000315 dt = self.date_time
316 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000317 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000319 # Set these to zero because we write them after the file data
320 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 else:
Tim Peterse1190062001-01-15 03:34:38 +0000322 CRC = self.CRC
323 compress_size = self.compress_size
324 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325
326 extra = self.extra
327
328 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
329 # File is larger than what fits into a 4 byte integer,
330 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000331 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332 extra = extra + struct.pack(fmt,
333 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000334 file_size = 0xffffffff
335 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000336 self.extract_version = max(45, self.extract_version)
337 self.create_version = max(45, self.extract_version)
338
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000339 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000340 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000341 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 self.compress_type, dostime, dosdate, CRC,
343 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000344 len(filename), len(extra))
345 return header + filename + extra
346
347 def _encodeFilenameFlags(self):
348 try:
349 return self.filename.encode('ascii'), self.flag_bits
350 except UnicodeEncodeError:
351 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352
353 def _decodeExtra(self):
354 # Try to decode the extra field.
355 extra = self.extra
356 unpack = struct.unpack
357 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000358 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 if tp == 1:
360 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000363 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000365 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 elif ln == 0:
367 counts = ()
368 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000369 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370
371 idx = 0
372
373 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000374 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000375 self.file_size = counts[idx]
376 idx += 1
377
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000378 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 self.compress_size = counts[idx]
380 idx += 1
381
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000382 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 old = self.header_offset
384 self.header_offset = counts[idx]
385 idx+=1
386
387 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388
389
Thomas Wouterscf297e42007-02-23 15:07:44 +0000390class _ZipDecrypter:
391 """Class to handle decryption of files stored within a ZIP archive.
392
393 ZIP supports a password-based form of encryption. Even though known
394 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000395 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000396
397 Usage:
398 zd = _ZipDecrypter(mypwd)
399 plain_char = zd(cypher_char)
400 plain_text = map(zd, cypher_text)
401 """
402
403 def _GenerateCRCTable():
404 """Generate a CRC-32 table.
405
406 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
407 internal keys. We noticed that a direct implementation is faster than
408 relying on binascii.crc32().
409 """
410 poly = 0xedb88320
411 table = [0] * 256
412 for i in range(256):
413 crc = i
414 for j in range(8):
415 if crc & 1:
416 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
417 else:
418 crc = ((crc >> 1) & 0x7FFFFFFF)
419 table[i] = crc
420 return table
421 crctable = _GenerateCRCTable()
422
423 def _crc32(self, ch, crc):
424 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000425 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000426
427 def __init__(self, pwd):
428 self.key0 = 305419896
429 self.key1 = 591751049
430 self.key2 = 878082192
431 for p in pwd:
432 self._UpdateKeys(p)
433
434 def _UpdateKeys(self, c):
435 self.key0 = self._crc32(c, self.key0)
436 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
437 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000438 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000439
440 def __call__(self, c):
441 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000442 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000443 k = self.key2 | 2
444 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000445 self._UpdateKeys(c)
446 return c
447
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000448class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000449 """File-like object for reading an archive member.
450 Is returned by ZipFile.open().
451 """
452
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000453 # Max size supported by decompressor.
454 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000455
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000456 # Read from compressed files in 4k blocks.
457 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000458
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000459 # Search for universal newlines or line chunks.
460 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
461
462 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
463 self._fileobj = fileobj
464 self._decrypter = decrypter
465
466 self._decompressor = zlib.decompressobj(-15)
467 self._unconsumed = b''
468
469 self._readbuffer = b''
470 self._offset = 0
471
472 self._universal = 'U' in mode
473 self.newlines = None
474
475 self._compress_type = zipinfo.compress_type
476 self._compress_size = zipinfo.compress_size
477 self._compress_left = zipinfo.compress_size
478
479 # Adjust read size for encrypted files since the first 12 bytes
480 # are for the encryption/password information.
481 if self._decrypter is not None:
482 self._compress_left -= 12
483
484 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000485 self.name = zipinfo.filename
486
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000487 def readline(self, limit=-1):
488 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000489
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000490 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000491 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000493 if not self._universal and limit < 0:
494 # Shortcut common case - newline found in buffer.
495 i = self._readbuffer.find(b'\n', self._offset) + 1
496 if i > 0:
497 line = self._readbuffer[self._offset: i]
498 self._offset = i
499 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000500
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000501 if not self._universal:
502 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000503
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000504 line = b''
505 while limit < 0 or len(line) < limit:
506 readahead = self.peek(2)
507 if readahead == b'':
508 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000509
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000510 #
511 # Search for universal newlines or line chunks.
512 #
513 # The pattern returns either a line chunk or a newline, but not
514 # both. Combined with peek(2), we are assured that the sequence
515 # '\r\n' is always retrieved completely and never split into
516 # separate newlines - '\r', '\n' due to coincidental readaheads.
517 #
518 match = self.PATTERN.search(readahead)
519 newline = match.group('newline')
520 if newline is not None:
521 if self.newlines is None:
522 self.newlines = []
523 if newline not in self.newlines:
524 self.newlines.append(newline)
525 self._offset += len(newline)
526 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000527
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000528 chunk = match.group('chunk')
529 if limit >= 0:
530 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000531
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000532 self._offset += len(chunk)
533 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000534
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000535 return line
536
537 def peek(self, n=1):
538 """Returns buffered bytes without advancing the position."""
539 if n > len(self._readbuffer) - self._offset:
540 chunk = self.read(n)
541 self._offset -= len(chunk)
542
543 # Return up to 512 bytes to reduce allocation overhead for tight loops.
544 return self._readbuffer[self._offset: self._offset + 512]
545
546 def readable(self):
547 return True
548
549 def read(self, n=-1):
550 """Read and return up to n bytes.
551 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000554 buf = b''
555 while n < 0 or n is None or n > len(buf):
556 data = self.read1(n)
557 if len(data) == 0:
558 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000560 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000561
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000562 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000563
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000564 def read1(self, n):
565 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000566
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000567 # Simplify algorithm (branching) by transforming negative n to large n.
568 if n < 0 or n is None:
569 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000570
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000571 # Bytes available in read buffer.
572 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000574 # Read from file.
575 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
576 nbytes = n - len_readbuffer - len(self._unconsumed)
577 nbytes = max(nbytes, self.MIN_READ_SIZE)
578 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000579
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000580 data = self._fileobj.read(nbytes)
581 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000582
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000583 if data and self._decrypter is not None:
584 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000585
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000586 if self._compress_type == ZIP_STORED:
587 self._readbuffer = self._readbuffer[self._offset:] + data
588 self._offset = 0
589 else:
590 # Prepare deflated bytes for decompression.
591 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000592
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000593 # Handle unconsumed data.
594 if len(self._unconsumed) > 0 and n > len_readbuffer:
595 data = self._decompressor.decompress(
596 self._unconsumed,
597 max(n - len_readbuffer, self.MIN_READ_SIZE)
598 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000599
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000600 self._unconsumed = self._decompressor.unconsumed_tail
601 if len(self._unconsumed) == 0 and self._compress_left == 0:
602 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000603
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000604 self._readbuffer = self._readbuffer[self._offset:] + data
605 self._offset = 0
606
607 # Read from buffer.
608 data = self._readbuffer[self._offset: self._offset + n]
609 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000610 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000611
612
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000613
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000614class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000615 """ Class with methods to open, read, write, close, list zip files.
616
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000617 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000618
Fred Drake3d9091e2001-03-26 15:49:24 +0000619 file: Either the path to the file, or a file-like object.
620 If it is a path, the file will be opened and closed by ZipFile.
621 mode: The mode can be either read "r", write "w" or append "a".
622 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000623 allowZip64: if True ZipFile will create files with ZIP64 extensions when
624 needed, otherwise it will raise an exception when this would
625 be necessary.
626
Fred Drake3d9091e2001-03-26 15:49:24 +0000627 """
Fred Drake484d7352000-10-02 21:14:52 +0000628
Fred Drake90eac282001-02-28 05:29:34 +0000629 fp = None # Set here since __del__ checks it
630
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000631 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000632 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000633 if mode not in ("r", "w", "a"):
634 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
635
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000636 if compression == ZIP_STORED:
637 pass
638 elif compression == ZIP_DEFLATED:
639 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000640 raise RuntimeError(
641 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000642 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000643 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000644
645 self._allowZip64 = allowZip64
646 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000647 self.debug = 0 # Level of printing: 0 through 3
648 self.NameToInfo = {} # Find file info given name
649 self.filelist = [] # List of ZipInfo instances for archive
650 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000651 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000652 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000653 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000654
Fred Drake3d9091e2001-03-26 15:49:24 +0000655 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000656 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000657 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000658 self._filePassed = 0
659 self.filename = file
660 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000661 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000662 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000663 except IOError:
664 if mode == 'a':
665 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000666 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000667 else:
668 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000669 else:
670 self._filePassed = 1
671 self.fp = file
672 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000673
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000674 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000675 self._GetContents()
676 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000677 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000678 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000679 try: # See if file is a zip file
680 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000681 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000682 self.fp.seek(self.start_dir, 0)
683 except BadZipfile: # file is not a zip file, just append
684 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000685 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000686 if not self._filePassed:
687 self.fp.close()
688 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000689 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000691 def __enter__(self):
692 return self
693
694 def __exit__(self, type, value, traceback):
695 self.close()
696
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000697 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000698 """Read the directory, making sure we close the file if the format
699 is bad."""
700 try:
701 self._RealGetContents()
702 except BadZipfile:
703 if not self._filePassed:
704 self.fp.close()
705 self.fp = None
706 raise
707
708 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000709 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000710 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000711 endrec = _EndRecData(fp)
712 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000713 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000714 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000715 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000716 size_cd = endrec[_ECD_SIZE] # bytes in central directory
717 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
718 self.comment = endrec[_ECD_COMMENT] # archive comment
719
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000721 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000722 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
723 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000724 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
725
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000727 inferred = concat + offset_cd
728 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 # self.start_dir: Position of start of central directory
730 self.start_dir = offset_cd + concat
731 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000732 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000733 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000734 total = 0
735 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000736 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000737 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000738 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 centdir = struct.unpack(structCentralDir, centdir)
740 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000741 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000742 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000743 flags = centdir[5]
744 if flags & 0x800:
745 # UTF-8 file names extension
746 filename = filename.decode('utf-8')
747 else:
748 # Historical ZIP filename encoding
749 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000751 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000752 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
753 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000754 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000755 (x.create_version, x.create_system, x.extract_version, x.reserved,
756 x.flag_bits, x.compress_type, t, d,
757 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
758 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
759 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000760 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000761 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000762 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000763
764 x._decodeExtra()
765 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 self.filelist.append(x)
767 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000768
769 # update total bytes read from central directory
770 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
771 + centdir[_CD_EXTRA_FIELD_LENGTH]
772 + centdir[_CD_COMMENT_LENGTH])
773
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000774 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000775 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000776
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777
778 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000779 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000780 l = []
781 for data in self.filelist:
782 l.append(data.filename)
783 return l
784
785 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000786 """Return a list of class ZipInfo instances for files in the
787 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 return self.filelist
789
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000790 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000791 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000792 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
793 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000795 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000796 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
797 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798
799 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000800 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000801 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802 for zinfo in self.filelist:
803 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000804 # Read by chunks, to avoid an OverflowError or a
805 # MemoryError with very large embedded files.
806 f = self.open(zinfo.filename, "r")
807 while f.read(chunk_size): # Check CRC-32
808 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000809 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 return zinfo.filename
811
812 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000813 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000814 info = self.NameToInfo.get(name)
815 if info is None:
816 raise KeyError(
817 'There is no item named %r in the archive' % name)
818
819 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820
Thomas Wouterscf297e42007-02-23 15:07:44 +0000821 def setpassword(self, pwd):
822 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000823 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000824 self.pwd = pwd
825
826 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000827 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 return self.open(name, "r", pwd).read()
829
830 def open(self, name, mode="r", pwd=None):
831 """Return file-like object for 'name'."""
832 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000833 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000835 raise RuntimeError(
836 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000837
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 # Only open a new file for instances where we were not
839 # given a file object in the constructor
840 if self._filePassed:
841 zef_file = self.fp
842 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000843 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844
Georg Brandlb533e262008-05-25 18:19:30 +0000845 # Make sure we have an info object
846 if isinstance(name, ZipInfo):
847 # 'name' is already an info object
848 zinfo = name
849 else:
850 # Get info object for name
851 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852
853 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000854
855 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000856 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000857 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000858 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000859
860 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000862 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000864
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000865 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000866 raise BadZipfile(
867 'File name in directory %r and header %r differ.'
868 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000869
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870 # check for encrypted flag & handle password
871 is_encrypted = zinfo.flag_bits & 0x1
872 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000873 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874 if not pwd:
875 pwd = self.pwd
876 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000877 raise RuntimeError("File %s is encrypted, "
878 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879
Thomas Wouterscf297e42007-02-23 15:07:44 +0000880 zd = _ZipDecrypter(pwd)
881 # The first 12 bytes in the cypher stream is an encryption header
882 # used to strengthen the algorithm. The first 11 bytes are
883 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000884 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000885 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000887 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000888 if zinfo.flag_bits & 0x8:
889 # compare against the file type from extended local headers
890 check_byte = (zinfo._raw_time >> 8) & 0xff
891 else:
892 # compare against the CRC otherwise
893 check_byte = (zinfo.CRC >> 24) & 0xff
894 if h[11] != check_byte:
895 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000897 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898
Christian Heimes790c8232008-01-07 21:14:23 +0000899 def extract(self, member, path=None, pwd=None):
900 """Extract a member from the archive to the current working directory,
901 using its full name. Its file information is extracted as accurately
902 as possible. `member' may be a filename or a ZipInfo object. You can
903 specify a different directory using `path'.
904 """
905 if not isinstance(member, ZipInfo):
906 member = self.getinfo(member)
907
908 if path is None:
909 path = os.getcwd()
910
911 return self._extract_member(member, path, pwd)
912
913 def extractall(self, path=None, members=None, pwd=None):
914 """Extract all members from the archive to the current working
915 directory. `path' specifies a different directory to extract to.
916 `members' is optional and must be a subset of the list returned
917 by namelist().
918 """
919 if members is None:
920 members = self.namelist()
921
922 for zipinfo in members:
923 self.extract(zipinfo, path, pwd)
924
925 def _extract_member(self, member, targetpath, pwd):
926 """Extract the ZipInfo object 'member' to a physical
927 file on the path targetpath.
928 """
929 # build the destination pathname, replacing
930 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000931 # Strip trailing path separator, unless it represents the root.
932 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
933 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000934 targetpath = targetpath[:-1]
935
936 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000937 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000938 targetpath = os.path.join(targetpath, member.filename[1:])
939 else:
940 targetpath = os.path.join(targetpath, member.filename)
941
942 targetpath = os.path.normpath(targetpath)
943
944 # Create all upper directories if necessary.
945 upperdirs = os.path.dirname(targetpath)
946 if upperdirs and not os.path.exists(upperdirs):
947 os.makedirs(upperdirs)
948
Martin v. Löwis59e47792009-01-24 14:10:07 +0000949 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000950 if not os.path.isdir(targetpath):
951 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000952 return targetpath
953
Georg Brandlb533e262008-05-25 18:19:30 +0000954 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000955 target = open(targetpath, "wb")
956 shutil.copyfileobj(source, target)
957 source.close()
958 target.close()
959
960 return targetpath
961
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000962 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000963 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000964 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000965 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000966 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000967 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000968 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000969 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000970 raise RuntimeError(
971 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000972 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000973 raise RuntimeError(
974 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000975 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000976 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000977 if zinfo.file_size > ZIP64_LIMIT:
978 if not self._allowZip64:
979 raise LargeZipFile("Filesize would require ZIP64 extensions")
980 if zinfo.header_offset > ZIP64_LIMIT:
981 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000982 raise LargeZipFile(
983 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984
985 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000986 """Put the bytes from filename into the archive under the name
987 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000988 if not self.fp:
989 raise RuntimeError(
990 "Attempt to write to ZIP archive that was already closed")
991
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000992 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000993 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000994 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 date_time = mtime[0:6]
996 # Create ZipInfo instance to store file information
997 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +0000998 arcname = filename
999 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1000 while arcname[0] in (os.sep, os.altsep):
1001 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001002 if isdir:
1003 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001004 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001005 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001007 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 else:
Tim Peterse1190062001-01-15 03:34:38 +00001009 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001010
1011 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001012 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001013 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001014
1015 self._writecheck(zinfo)
1016 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001017
1018 if isdir:
1019 zinfo.file_size = 0
1020 zinfo.compress_size = 0
1021 zinfo.CRC = 0
1022 self.filelist.append(zinfo)
1023 self.NameToInfo[zinfo.filename] = zinfo
1024 self.fp.write(zinfo.FileHeader())
1025 return
1026
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001027 with open(filename, "rb") as fp:
1028 # Must overwrite CRC and sizes with correct data later
1029 zinfo.CRC = CRC = 0
1030 zinfo.compress_size = compress_size = 0
1031 zinfo.file_size = file_size = 0
1032 self.fp.write(zinfo.FileHeader())
1033 if zinfo.compress_type == ZIP_DEFLATED:
1034 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1035 zlib.DEFLATED, -15)
1036 else:
1037 cmpr = None
1038 while 1:
1039 buf = fp.read(1024 * 8)
1040 if not buf:
1041 break
1042 file_size = file_size + len(buf)
1043 CRC = crc32(buf, CRC) & 0xffffffff
1044 if cmpr:
1045 buf = cmpr.compress(buf)
1046 compress_size = compress_size + len(buf)
1047 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 if cmpr:
1049 buf = cmpr.flush()
1050 compress_size = compress_size + len(buf)
1051 self.fp.write(buf)
1052 zinfo.compress_size = compress_size
1053 else:
1054 zinfo.compress_size = file_size
1055 zinfo.CRC = CRC
1056 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001057 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001058 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001059 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001060 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001062 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 self.filelist.append(zinfo)
1064 self.NameToInfo[zinfo.filename] = zinfo
1065
Guido van Rossum85825dc2007-08-27 17:03:28 +00001066 def writestr(self, zinfo_or_arcname, data):
1067 """Write a file into the archive. The contents is 'data', which
1068 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1069 it is encoded as UTF-8 first.
1070 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001071 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001072 if isinstance(data, str):
1073 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001074 if not isinstance(zinfo_or_arcname, ZipInfo):
1075 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001076 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001077 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001078 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001079 else:
1080 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001081
1082 if not self.fp:
1083 raise RuntimeError(
1084 "Attempt to write to ZIP archive that was already closed")
1085
Guido van Rossum85825dc2007-08-27 17:03:28 +00001086 zinfo.file_size = len(data) # Uncompressed size
1087 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001088 self._writecheck(zinfo)
1089 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001090 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 if zinfo.compress_type == ZIP_DEFLATED:
1092 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1093 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001094 data = co.compress(data) + co.flush()
1095 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 else:
1097 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001098 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001099 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001100 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001101 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001103 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001104 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001105 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 self.filelist.append(zinfo)
1107 self.NameToInfo[zinfo.filename] = zinfo
1108
1109 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001110 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001111 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001112
1113 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001114 """Close the file, and for mode "w" and "a" write the ending
1115 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001116 if self.fp is None:
1117 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001118
1119 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001120 count = 0
1121 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001122 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123 count = count + 1
1124 dt = zinfo.date_time
1125 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001126 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001127 extra = []
1128 if zinfo.file_size > ZIP64_LIMIT \
1129 or zinfo.compress_size > ZIP64_LIMIT:
1130 extra.append(zinfo.file_size)
1131 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001132 file_size = 0xffffffff
1133 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001134 else:
1135 file_size = zinfo.file_size
1136 compress_size = zinfo.compress_size
1137
1138 if zinfo.header_offset > ZIP64_LIMIT:
1139 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001140 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001141 else:
1142 header_offset = zinfo.header_offset
1143
1144 extra_data = zinfo.extra
1145 if extra:
1146 # Append a ZIP64 field to the extra's
1147 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001148 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001149 1, 8*len(extra), *extra) + extra_data
1150
1151 extract_version = max(45, zinfo.extract_version)
1152 create_version = max(45, zinfo.create_version)
1153 else:
1154 extract_version = zinfo.extract_version
1155 create_version = zinfo.create_version
1156
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001157 try:
1158 filename, flag_bits = zinfo._encodeFilenameFlags()
1159 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001160 stringCentralDir, create_version,
1161 zinfo.create_system, extract_version, zinfo.reserved,
1162 flag_bits, zinfo.compress_type, dostime, dosdate,
1163 zinfo.CRC, compress_size, file_size,
1164 len(filename), len(extra_data), len(zinfo.comment),
1165 0, zinfo.internal_attr, zinfo.external_attr,
1166 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001167 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001168 print((structCentralDir, stringCentralDir, create_version,
1169 zinfo.create_system, extract_version, zinfo.reserved,
1170 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1171 zinfo.CRC, compress_size, file_size,
1172 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1173 0, zinfo.internal_attr, zinfo.external_attr,
1174 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001175 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001176 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001177 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001178 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001179 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001180
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 pos2 = self.fp.tell()
1182 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001183 centDirCount = count
1184 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001185 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001186 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1187 centDirOffset > ZIP64_LIMIT or
1188 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001189 # Need to write the ZIP64 end-of-archive records
1190 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001191 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001192 44, 45, 45, 0, 0, centDirCount, centDirCount,
1193 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001194 self.fp.write(zip64endrec)
1195
1196 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001197 structEndArchive64Locator,
1198 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001199 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001200 centDirCount = min(centDirCount, 0xFFFF)
1201 centDirSize = min(centDirSize, 0xFFFFFFFF)
1202 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001203
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001204 # check for valid comment length
1205 if len(self.comment) >= ZIP_MAX_COMMENT:
1206 if self.debug > 0:
1207 msg = 'Archive comment is too long; truncating to %d bytes' \
1208 % ZIP_MAX_COMMENT
1209 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001210
Georg Brandl2ee470f2008-07-16 12:55:28 +00001211 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001212 0, 0, centDirCount, centDirCount,
1213 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001214 self.fp.write(endrec)
1215 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001216 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001217
Fred Drake3d9091e2001-03-26 15:49:24 +00001218 if not self._filePassed:
1219 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001220 self.fp = None
1221
1222
1223class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001224 """Class to create ZIP archives with Python library files and packages."""
1225
Georg Brandlfe991052009-09-16 15:54:04 +00001226 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 """Add all files from "pathname" to the ZIP archive.
1228
Fred Drake484d7352000-10-02 21:14:52 +00001229 If pathname is a package directory, search the directory and
1230 all package subdirectories recursively for all *.py and enter
1231 the modules into the archive. If pathname is a plain
1232 directory, listdir *.py and enter all modules. Else, pathname
1233 must be a Python *.py file and the module will be put into the
1234 archive. Added modules are always module.pyo or module.pyc.
1235 This method will compile the module.py into module.pyc if
1236 necessary.
1237 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001238 dir, name = os.path.split(pathname)
1239 if os.path.isdir(pathname):
1240 initname = os.path.join(pathname, "__init__.py")
1241 if os.path.isfile(initname):
1242 # This is a package directory, add it
1243 if basename:
1244 basename = "%s/%s" % (basename, name)
1245 else:
1246 basename = name
1247 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001248 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001249 fname, arcname = self._get_codename(initname[0:-3], basename)
1250 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001251 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001252 self.write(fname, arcname)
1253 dirlist = os.listdir(pathname)
1254 dirlist.remove("__init__.py")
1255 # Add all *.py files and package subdirectories
1256 for filename in dirlist:
1257 path = os.path.join(pathname, filename)
1258 root, ext = os.path.splitext(filename)
1259 if os.path.isdir(path):
1260 if os.path.isfile(os.path.join(path, "__init__.py")):
1261 # This is a package directory, add it
1262 self.writepy(path, basename) # Recursive call
1263 elif ext == ".py":
1264 fname, arcname = self._get_codename(path[0:-3],
1265 basename)
1266 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001267 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 self.write(fname, arcname)
1269 else:
1270 # This is NOT a package directory, add its files at top level
1271 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001272 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001273 for filename in os.listdir(pathname):
1274 path = os.path.join(pathname, filename)
1275 root, ext = os.path.splitext(filename)
1276 if ext == ".py":
1277 fname, arcname = self._get_codename(path[0:-3],
1278 basename)
1279 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001280 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001281 self.write(fname, arcname)
1282 else:
1283 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001284 raise RuntimeError(
1285 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001286 fname, arcname = self._get_codename(pathname[0:-3], basename)
1287 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001288 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001289 self.write(fname, arcname)
1290
1291 def _get_codename(self, pathname, basename):
1292 """Return (filename, archivename) for the path.
1293
Fred Drake484d7352000-10-02 21:14:52 +00001294 Given a module name path, return the correct file path and
1295 archive name, compiling if necessary. For example, given
1296 /python/lib/string, return (/python/lib/string.pyc, string).
1297 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 file_py = pathname + ".py"
1299 file_pyc = pathname + ".pyc"
1300 file_pyo = pathname + ".pyo"
1301 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001302 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001303 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001305 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001306 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001308 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001309 try:
1310 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001311 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001312 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 fname = file_pyc
1314 else:
1315 fname = file_pyc
1316 archivename = os.path.split(fname)[1]
1317 if basename:
1318 archivename = "%s/%s" % (basename, archivename)
1319 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001320
1321
1322def main(args = None):
1323 import textwrap
1324 USAGE=textwrap.dedent("""\
1325 Usage:
1326 zipfile.py -l zipfile.zip # Show listing of a zipfile
1327 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1328 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1329 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1330 """)
1331 if args is None:
1332 args = sys.argv[1:]
1333
1334 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001335 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001336 sys.exit(1)
1337
1338 if args[0] == '-l':
1339 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001340 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001341 sys.exit(1)
1342 zf = ZipFile(args[1], 'r')
1343 zf.printdir()
1344 zf.close()
1345
1346 elif args[0] == '-t':
1347 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001348 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001349 sys.exit(1)
1350 zf = ZipFile(args[1], 'r')
1351 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001352 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001353
1354 elif args[0] == '-e':
1355 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001356 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001357 sys.exit(1)
1358
1359 zf = ZipFile(args[1], 'r')
1360 out = args[2]
1361 for path in zf.namelist():
1362 if path.startswith('./'):
1363 tgt = os.path.join(out, path[2:])
1364 else:
1365 tgt = os.path.join(out, path)
1366
1367 tgtdir = os.path.dirname(tgt)
1368 if not os.path.exists(tgtdir):
1369 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001370 with open(tgt, 'wb') as fp:
1371 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001372 zf.close()
1373
1374 elif args[0] == '-c':
1375 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001376 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001377 sys.exit(1)
1378
1379 def addToZip(zf, path, zippath):
1380 if os.path.isfile(path):
1381 zf.write(path, zippath, ZIP_DEFLATED)
1382 elif os.path.isdir(path):
1383 for nm in os.listdir(path):
1384 addToZip(zf,
1385 os.path.join(path, nm), os.path.join(zippath, nm))
1386 # else: ignore
1387
1388 zf = ZipFile(args[1], 'w', allowZip64=True)
1389 for src in args[2:]:
1390 addToZip(zf, src, os.path.basename(src))
1391
1392 zf.close()
1393
1394if __name__ == "__main__":
1395 main()