blob: d5caf260798157d5006787d94b5bb40d019393a6 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Martin v. Löwis59e47792009-01-24 14:10:07 +00007import binascii, io, stat
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import io
9import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +000010
11try:
Tim Peterse1190062001-01-15 03:34:38 +000012 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000013 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000014except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000016 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
Skip Montanaro40fc1602001-03-01 04:27:19 +000018__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000019 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000020
Fred Drake5db246d2000-09-29 20:44:48 +000021class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000023
24
25class LargeZipFile(Exception):
26 """
27 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
28 and those extensions are disabled.
29 """
30
Tim Peterse1190062001-01-15 03:34:38 +000031error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000032
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000033ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000034ZIP_FILECOUNT_LIMIT = 1 << 16
35ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000036
Guido van Rossum32abe6f2000-03-31 17:30:02 +000037# constants for Zip file compression methods
38ZIP_STORED = 0
39ZIP_DEFLATED = 8
40# Other ZIP compression methods not supported
41
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042# Below are some formats and associated data for reading/writing headers using
43# the struct module. The names and structures of headers/records are those used
44# in the PKWARE description of the ZIP file format:
45# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
46# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000047
Martin v. Löwisb09b8442008-07-03 14:13:42 +000048# The "end of central directory" structure, magic number, size, and indices
49# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000050structEndArchive = b"<4s4H2LH"
51stringEndArchive = b"PK\005\006"
52sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000053
54_ECD_SIGNATURE = 0
55_ECD_DISK_NUMBER = 1
56_ECD_DISK_START = 2
57_ECD_ENTRIES_THIS_DISK = 3
58_ECD_ENTRIES_TOTAL = 4
59_ECD_SIZE = 5
60_ECD_OFFSET = 6
61_ECD_COMMENT_SIZE = 7
62# These last two indices are not part of the structure as defined in the
63# spec, but they are used internally by this module as a convenience
64_ECD_COMMENT = 8
65_ECD_LOCATION = 9
66
67# The "central directory" structure, magic number, size, and indices
68# of entries in the structure (section V.F in the format document)
69structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000070stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000071sizeCentralDir = struct.calcsize(structCentralDir)
72
Fred Drake3e038e52001-02-28 17:56:26 +000073# indexes of entries in the central directory structure
74_CD_SIGNATURE = 0
75_CD_CREATE_VERSION = 1
76_CD_CREATE_SYSTEM = 2
77_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000079_CD_FLAG_BITS = 5
80_CD_COMPRESS_TYPE = 6
81_CD_TIME = 7
82_CD_DATE = 8
83_CD_CRC = 9
84_CD_COMPRESSED_SIZE = 10
85_CD_UNCOMPRESSED_SIZE = 11
86_CD_FILENAME_LENGTH = 12
87_CD_EXTRA_FIELD_LENGTH = 13
88_CD_COMMENT_LENGTH = 14
89_CD_DISK_NUMBER_START = 15
90_CD_INTERNAL_FILE_ATTRIBUTES = 16
91_CD_EXTERNAL_FILE_ATTRIBUTES = 17
92_CD_LOCAL_HEADER_OFFSET = 18
93
Martin v. Löwisb09b8442008-07-03 14:13:42 +000094# The "local file header" structure, magic number, size, and indices
95# (section V.A in the format document)
96structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000097stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000098sizeFileHeader = struct.calcsize(structFileHeader)
99
Fred Drake3e038e52001-02-28 17:56:26 +0000100_FH_SIGNATURE = 0
101_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000103_FH_GENERAL_PURPOSE_FLAG_BITS = 3
104_FH_COMPRESSION_METHOD = 4
105_FH_LAST_MOD_TIME = 5
106_FH_LAST_MOD_DATE = 6
107_FH_CRC = 7
108_FH_COMPRESSED_SIZE = 8
109_FH_UNCOMPRESSED_SIZE = 9
110_FH_FILENAME_LENGTH = 10
111_FH_EXTRA_FIELD_LENGTH = 11
112
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000113# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000114structEndArchive64Locator = "<4sLQL"
115stringEndArchive64Locator = b"PK\x06\x07"
116sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000117
118# The "Zip64 end of central directory" record, magic number, size, and indices
119# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000120structEndArchive64 = "<4sQ2H2L4Q"
121stringEndArchive64 = b"PK\x06\x06"
122sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000123
124_CD64_SIGNATURE = 0
125_CD64_DIRECTORY_RECSIZE = 1
126_CD64_CREATE_VERSION = 2
127_CD64_EXTRACT_VERSION = 3
128_CD64_DISK_NUMBER = 4
129_CD64_DISK_NUMBER_START = 5
130_CD64_NUMBER_ENTRIES_THIS_DISK = 6
131_CD64_NUMBER_ENTRIES_TOTAL = 7
132_CD64_DIRECTORY_SIZE = 8
133_CD64_OFFSET_START_CENTDIR = 9
134
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000136 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000137 if _EndRecData(fp):
138 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000139 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000141 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def is_zipfile(filename):
144 """Quickly see if a file is a ZIP file by checking the magic number.
145
146 The filename argument may be a file or file-like object too.
147 """
148 result = False
149 try:
150 if hasattr(filename, "read"):
151 result = _check_zipfile(fp=filename)
152 else:
153 with open(filename, "rb") as fp:
154 result = _check_zipfile(fp)
155 except IOError:
156 pass
157 return result
158
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000159def _EndRecData64(fpin, offset, endrec):
160 """
161 Read the ZIP64 end-of-archive records and use that to update endrec
162 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000163 fpin.seek(offset - sizeEndCentDir64Locator, 2)
164 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000165 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
166 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167 return endrec
168
169 if diskno != 0 or disks != 1:
170 raise BadZipfile("zipfiles that span multiple disks are not supported")
171
172 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000173 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
174 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000175 sig, sz, create_version, read_version, disk_num, disk_dir, \
176 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000177 struct.unpack(structEndArchive64, data)
178 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000179 return endrec
180
181 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000182 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000183 endrec[_ECD_DISK_NUMBER] = disk_num
184 endrec[_ECD_DISK_START] = disk_dir
185 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
186 endrec[_ECD_ENTRIES_TOTAL] = dircount2
187 endrec[_ECD_SIZE] = dirsize
188 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 return endrec
190
191
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000192def _EndRecData(fpin):
193 """Return data from the "End of Central Directory" record, or None.
194
195 The data is a list of the nine items in the ZIP "End of central dir"
196 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197
198 # Determine file size
199 fpin.seek(0, 2)
200 filesize = fpin.tell()
201
202 # Check to see if this is ZIP file with no archive comment (the
203 # "end of central directory" structure should be the last item in the
204 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000205 try:
206 fpin.seek(-sizeEndCentDir, 2)
207 except IOError:
208 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000209 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000210 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000212 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000213 endrec=list(endrec)
214
215 # Append a blank comment and record start offset
216 endrec.append(b"")
217 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000219 # Try to read the "Zip64 end of central directory" structure
220 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000221
222 # Either this is not a ZIP file, or it is a ZIP file with an archive
223 # comment. Search the end of the file for the "end of central directory"
224 # record signature. The comment is the last item in the ZIP file and may be
225 # up to 64K long. It is assumed that the "end of central directory" magic
226 # number does not appear in the comment.
227 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
228 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000229 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000230 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000231 if start >= 0:
232 # found the magic number; attempt to unpack and interpret
233 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000234 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235 comment = data[start+sizeEndCentDir:]
236 # check that comment length is correct
237 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000238 # Append the archive comment and start offset
239 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000240 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000241
242 # Try to read the "Zip64 end of central directory" structure
243 return _EndRecData64(fpin, maxCommentStart + start - filesize,
244 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245
246 # Unable to find a valid end of central directory structure
247 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000248
Fred Drake484d7352000-10-02 21:14:52 +0000249
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000250class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000251 """Class with attributes describing each file in the ZIP archive."""
252
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000253 __slots__ = (
254 'orig_filename',
255 'filename',
256 'date_time',
257 'compress_type',
258 'comment',
259 'extra',
260 'create_system',
261 'create_version',
262 'extract_version',
263 'reserved',
264 'flag_bits',
265 'volume',
266 'internal_attr',
267 'external_attr',
268 'header_offset',
269 'CRC',
270 'compress_size',
271 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000272 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000273 )
274
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000275 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000276 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000277
278 # Terminate the file name at the first null byte. Null bytes in file
279 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000280 null_byte = filename.find(chr(0))
281 if null_byte >= 0:
282 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000283 # This is used to ensure paths in generated ZIP files always use
284 # forward slashes as the directory separator, as required by the
285 # ZIP format specification.
286 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000287 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000288
Greg Ward8e36d282003-06-18 00:53:06 +0000289 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000291 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000292 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000293 self.comment = b"" # Comment for each file
294 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000295 if sys.platform == 'win32':
296 self.create_system = 0 # System which created ZIP archive
297 else:
298 # Assume everything else is unix-y
299 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000300 self.create_version = 20 # Version which created ZIP archive
301 self.extract_version = 20 # Version needed to extract archive
302 self.reserved = 0 # Must be zero
303 self.flag_bits = 0 # ZIP flag bits
304 self.volume = 0 # Volume number of file header
305 self.internal_attr = 0 # Internal attributes
306 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000308 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000309 # CRC CRC-32 of the uncompressed file
310 # compress_size Size of the compressed file
311 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000312
313 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000314 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000315 dt = self.date_time
316 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000317 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000319 # Set these to zero because we write them after the file data
320 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 else:
Tim Peterse1190062001-01-15 03:34:38 +0000322 CRC = self.CRC
323 compress_size = self.compress_size
324 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000325
326 extra = self.extra
327
328 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
329 # File is larger than what fits into a 4 byte integer,
330 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000331 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332 extra = extra + struct.pack(fmt,
333 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000334 file_size = 0xffffffff
335 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000336 self.extract_version = max(45, self.extract_version)
337 self.create_version = max(45, self.extract_version)
338
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000339 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000340 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000341 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 self.compress_type, dostime, dosdate, CRC,
343 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000344 len(filename), len(extra))
345 return header + filename + extra
346
347 def _encodeFilenameFlags(self):
348 try:
349 return self.filename.encode('ascii'), self.flag_bits
350 except UnicodeEncodeError:
351 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352
353 def _decodeExtra(self):
354 # Try to decode the extra field.
355 extra = self.extra
356 unpack = struct.unpack
357 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000358 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359 if tp == 1:
360 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000363 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000365 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 elif ln == 0:
367 counts = ()
368 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000369 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370
371 idx = 0
372
373 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000374 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000375 self.file_size = counts[idx]
376 idx += 1
377
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000378 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 self.compress_size = counts[idx]
380 idx += 1
381
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000382 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 old = self.header_offset
384 self.header_offset = counts[idx]
385 idx+=1
386
387 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000388
389
Thomas Wouterscf297e42007-02-23 15:07:44 +0000390class _ZipDecrypter:
391 """Class to handle decryption of files stored within a ZIP archive.
392
393 ZIP supports a password-based form of encryption. Even though known
394 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000395 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000396
397 Usage:
398 zd = _ZipDecrypter(mypwd)
399 plain_char = zd(cypher_char)
400 plain_text = map(zd, cypher_text)
401 """
402
403 def _GenerateCRCTable():
404 """Generate a CRC-32 table.
405
406 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
407 internal keys. We noticed that a direct implementation is faster than
408 relying on binascii.crc32().
409 """
410 poly = 0xedb88320
411 table = [0] * 256
412 for i in range(256):
413 crc = i
414 for j in range(8):
415 if crc & 1:
416 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
417 else:
418 crc = ((crc >> 1) & 0x7FFFFFFF)
419 table[i] = crc
420 return table
421 crctable = _GenerateCRCTable()
422
423 def _crc32(self, ch, crc):
424 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000425 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000426
427 def __init__(self, pwd):
428 self.key0 = 305419896
429 self.key1 = 591751049
430 self.key2 = 878082192
431 for p in pwd:
432 self._UpdateKeys(p)
433
434 def _UpdateKeys(self, c):
435 self.key0 = self._crc32(c, self.key0)
436 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
437 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000438 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000439
440 def __call__(self, c):
441 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000442 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000443 k = self.key2 | 2
444 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000445 self._UpdateKeys(c)
446 return c
447
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000448class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000449 """File-like object for reading an archive member.
450 Is returned by ZipFile.open().
451 """
452
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000453 # Max size supported by decompressor.
454 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000455
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000456 # Read from compressed files in 4k blocks.
457 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000458
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000459 # Search for universal newlines or line chunks.
460 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
461
462 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
463 self._fileobj = fileobj
464 self._decrypter = decrypter
465
Ezio Melotti92b47432010-01-28 01:44:41 +0000466 self._compress_type = zipinfo.compress_type
467 self._compress_size = zipinfo.compress_size
468 self._compress_left = zipinfo.compress_size
469
470 if self._compress_type == ZIP_DEFLATED:
471 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000472 self._unconsumed = b''
473
474 self._readbuffer = b''
475 self._offset = 0
476
477 self._universal = 'U' in mode
478 self.newlines = None
479
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000480 # Adjust read size for encrypted files since the first 12 bytes
481 # are for the encryption/password information.
482 if self._decrypter is not None:
483 self._compress_left -= 12
484
485 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000486 self.name = zipinfo.filename
487
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000488 def readline(self, limit=-1):
489 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000490
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000491 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000492 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000494 if not self._universal and limit < 0:
495 # Shortcut common case - newline found in buffer.
496 i = self._readbuffer.find(b'\n', self._offset) + 1
497 if i > 0:
498 line = self._readbuffer[self._offset: i]
499 self._offset = i
500 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000501
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000502 if not self._universal:
503 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000504
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000505 line = b''
506 while limit < 0 or len(line) < limit:
507 readahead = self.peek(2)
508 if readahead == b'':
509 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000510
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000511 #
512 # Search for universal newlines or line chunks.
513 #
514 # The pattern returns either a line chunk or a newline, but not
515 # both. Combined with peek(2), we are assured that the sequence
516 # '\r\n' is always retrieved completely and never split into
517 # separate newlines - '\r', '\n' due to coincidental readaheads.
518 #
519 match = self.PATTERN.search(readahead)
520 newline = match.group('newline')
521 if newline is not None:
522 if self.newlines is None:
523 self.newlines = []
524 if newline not in self.newlines:
525 self.newlines.append(newline)
526 self._offset += len(newline)
527 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000528
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000529 chunk = match.group('chunk')
530 if limit >= 0:
531 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000532
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000533 self._offset += len(chunk)
534 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000535
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000536 return line
537
538 def peek(self, n=1):
539 """Returns buffered bytes without advancing the position."""
540 if n > len(self._readbuffer) - self._offset:
541 chunk = self.read(n)
542 self._offset -= len(chunk)
543
544 # Return up to 512 bytes to reduce allocation overhead for tight loops.
545 return self._readbuffer[self._offset: self._offset + 512]
546
547 def readable(self):
548 return True
549
550 def read(self, n=-1):
551 """Read and return up to n bytes.
552 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000554
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000555 buf = b''
556 while n < 0 or n is None or n > len(buf):
557 data = self.read1(n)
558 if len(data) == 0:
559 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000560
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000561 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000563 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000565 def read1(self, n):
566 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000568 # Simplify algorithm (branching) by transforming negative n to large n.
569 if n < 0 or n is None:
570 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000571
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000572 # Bytes available in read buffer.
573 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000574
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000575 # Read from file.
576 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
577 nbytes = n - len_readbuffer - len(self._unconsumed)
578 nbytes = max(nbytes, self.MIN_READ_SIZE)
579 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000580
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000581 data = self._fileobj.read(nbytes)
582 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000583
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000584 if data and self._decrypter is not None:
585 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000586
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000587 if self._compress_type == ZIP_STORED:
588 self._readbuffer = self._readbuffer[self._offset:] + data
589 self._offset = 0
590 else:
591 # Prepare deflated bytes for decompression.
592 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000593
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000594 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000595 if (len(self._unconsumed) > 0 and n > len_readbuffer and
596 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000597 data = self._decompressor.decompress(
598 self._unconsumed,
599 max(n - len_readbuffer, self.MIN_READ_SIZE)
600 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000601
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000602 self._unconsumed = self._decompressor.unconsumed_tail
603 if len(self._unconsumed) == 0 and self._compress_left == 0:
604 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000605
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000606 self._readbuffer = self._readbuffer[self._offset:] + data
607 self._offset = 0
608
609 # Read from buffer.
610 data = self._readbuffer[self._offset: self._offset + n]
611 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000612 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000613
614
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000615
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000616class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000617 """ Class with methods to open, read, write, close, list zip files.
618
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000619 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000620
Fred Drake3d9091e2001-03-26 15:49:24 +0000621 file: Either the path to the file, or a file-like object.
622 If it is a path, the file will be opened and closed by ZipFile.
623 mode: The mode can be either read "r", write "w" or append "a".
624 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000625 allowZip64: if True ZipFile will create files with ZIP64 extensions when
626 needed, otherwise it will raise an exception when this would
627 be necessary.
628
Fred Drake3d9091e2001-03-26 15:49:24 +0000629 """
Fred Drake484d7352000-10-02 21:14:52 +0000630
Fred Drake90eac282001-02-28 05:29:34 +0000631 fp = None # Set here since __del__ checks it
632
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000633 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000634 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000635 if mode not in ("r", "w", "a"):
636 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
637
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000638 if compression == ZIP_STORED:
639 pass
640 elif compression == ZIP_DEFLATED:
641 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000642 raise RuntimeError(
643 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000644 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000645 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000646
647 self._allowZip64 = allowZip64
648 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000649 self.debug = 0 # Level of printing: 0 through 3
650 self.NameToInfo = {} # Find file info given name
651 self.filelist = [] # List of ZipInfo instances for archive
652 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000653 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000654 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000655 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000656
Fred Drake3d9091e2001-03-26 15:49:24 +0000657 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000658 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000659 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000660 self._filePassed = 0
661 self.filename = file
662 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000663 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000664 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000665 except IOError:
666 if mode == 'a':
667 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000668 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000669 else:
670 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000671 else:
672 self._filePassed = 1
673 self.fp = file
674 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000675
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000676 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000677 self._GetContents()
678 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000679 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000680 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000681 try: # See if file is a zip file
682 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000683 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000684 self.fp.seek(self.start_dir, 0)
685 except BadZipfile: # file is not a zip file, just append
686 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000687 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000688 if not self._filePassed:
689 self.fp.close()
690 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000691 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000692
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000693 def __enter__(self):
694 return self
695
696 def __exit__(self, type, value, traceback):
697 self.close()
698
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000700 """Read the directory, making sure we close the file if the format
701 is bad."""
702 try:
703 self._RealGetContents()
704 except BadZipfile:
705 if not self._filePassed:
706 self.fp.close()
707 self.fp = None
708 raise
709
710 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000711 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000713 endrec = _EndRecData(fp)
714 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000715 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000717 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000718 size_cd = endrec[_ECD_SIZE] # bytes in central directory
719 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
720 self.comment = endrec[_ECD_COMMENT] # archive comment
721
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000723 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000724 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
725 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000726 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
727
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000729 inferred = concat + offset_cd
730 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 # self.start_dir: Position of start of central directory
732 self.start_dir = offset_cd + concat
733 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000734 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000735 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000736 total = 0
737 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000738 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000739 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000740 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 centdir = struct.unpack(structCentralDir, centdir)
742 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000743 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000744 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000745 flags = centdir[5]
746 if flags & 0x800:
747 # UTF-8 file names extension
748 filename = filename.decode('utf-8')
749 else:
750 # Historical ZIP filename encoding
751 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000752 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000753 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000754 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
755 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000756 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 (x.create_version, x.create_system, x.extract_version, x.reserved,
758 x.flag_bits, x.compress_type, t, d,
759 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
760 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
761 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000762 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000763 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000764 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000765
766 x._decodeExtra()
767 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000768 self.filelist.append(x)
769 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000770
771 # update total bytes read from central directory
772 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
773 + centdir[_CD_EXTRA_FIELD_LENGTH]
774 + centdir[_CD_COMMENT_LENGTH])
775
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000777 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000779
780 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000781 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000782 l = []
783 for data in self.filelist:
784 l.append(data.filename)
785 return l
786
787 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000788 """Return a list of class ZipInfo instances for files in the
789 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000790 return self.filelist
791
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000792 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000793 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000794 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
795 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000797 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000798 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
799 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000800
801 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000802 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000803 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000804 for zinfo in self.filelist:
805 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000806 # Read by chunks, to avoid an OverflowError or a
807 # MemoryError with very large embedded files.
808 f = self.open(zinfo.filename, "r")
809 while f.read(chunk_size): # Check CRC-32
810 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000811 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 return zinfo.filename
813
814 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000815 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000816 info = self.NameToInfo.get(name)
817 if info is None:
818 raise KeyError(
819 'There is no item named %r in the archive' % name)
820
821 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822
Thomas Wouterscf297e42007-02-23 15:07:44 +0000823 def setpassword(self, pwd):
824 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000825 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000826 self.pwd = pwd
827
828 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000829 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830 return self.open(name, "r", pwd).read()
831
832 def open(self, name, mode="r", pwd=None):
833 """Return file-like object for 'name'."""
834 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000835 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000837 raise RuntimeError(
838 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000839
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840 # Only open a new file for instances where we were not
841 # given a file object in the constructor
842 if self._filePassed:
843 zef_file = self.fp
844 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000845 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846
Georg Brandlb533e262008-05-25 18:19:30 +0000847 # Make sure we have an info object
848 if isinstance(name, ZipInfo):
849 # 'name' is already an info object
850 zinfo = name
851 else:
852 # Get info object for name
853 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
855 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000856
857 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000858 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000859 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000860 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000861
862 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000864 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000866
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000867 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000868 raise BadZipfile(
869 'File name in directory %r and header %r differ.'
870 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000871
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872 # check for encrypted flag & handle password
873 is_encrypted = zinfo.flag_bits & 0x1
874 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000875 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 if not pwd:
877 pwd = self.pwd
878 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000879 raise RuntimeError("File %s is encrypted, "
880 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000881
Thomas Wouterscf297e42007-02-23 15:07:44 +0000882 zd = _ZipDecrypter(pwd)
883 # The first 12 bytes in the cypher stream is an encryption header
884 # used to strengthen the algorithm. The first 11 bytes are
885 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000886 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000887 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000889 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000890 if zinfo.flag_bits & 0x8:
891 # compare against the file type from extended local headers
892 check_byte = (zinfo._raw_time >> 8) & 0xff
893 else:
894 # compare against the CRC otherwise
895 check_byte = (zinfo.CRC >> 24) & 0xff
896 if h[11] != check_byte:
897 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000899 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000900
Christian Heimes790c8232008-01-07 21:14:23 +0000901 def extract(self, member, path=None, pwd=None):
902 """Extract a member from the archive to the current working directory,
903 using its full name. Its file information is extracted as accurately
904 as possible. `member' may be a filename or a ZipInfo object. You can
905 specify a different directory using `path'.
906 """
907 if not isinstance(member, ZipInfo):
908 member = self.getinfo(member)
909
910 if path is None:
911 path = os.getcwd()
912
913 return self._extract_member(member, path, pwd)
914
915 def extractall(self, path=None, members=None, pwd=None):
916 """Extract all members from the archive to the current working
917 directory. `path' specifies a different directory to extract to.
918 `members' is optional and must be a subset of the list returned
919 by namelist().
920 """
921 if members is None:
922 members = self.namelist()
923
924 for zipinfo in members:
925 self.extract(zipinfo, path, pwd)
926
927 def _extract_member(self, member, targetpath, pwd):
928 """Extract the ZipInfo object 'member' to a physical
929 file on the path targetpath.
930 """
931 # build the destination pathname, replacing
932 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000933 # Strip trailing path separator, unless it represents the root.
934 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
935 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000936 targetpath = targetpath[:-1]
937
938 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000939 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000940 targetpath = os.path.join(targetpath, member.filename[1:])
941 else:
942 targetpath = os.path.join(targetpath, member.filename)
943
944 targetpath = os.path.normpath(targetpath)
945
946 # Create all upper directories if necessary.
947 upperdirs = os.path.dirname(targetpath)
948 if upperdirs and not os.path.exists(upperdirs):
949 os.makedirs(upperdirs)
950
Martin v. Löwis59e47792009-01-24 14:10:07 +0000951 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000952 if not os.path.isdir(targetpath):
953 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000954 return targetpath
955
Georg Brandlb533e262008-05-25 18:19:30 +0000956 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000957 target = open(targetpath, "wb")
958 shutil.copyfileobj(source, target)
959 source.close()
960 target.close()
961
962 return targetpath
963
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000964 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000965 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000966 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000967 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000968 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000969 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000970 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000971 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000972 raise RuntimeError(
973 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000974 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000975 raise RuntimeError(
976 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000977 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000978 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000979 if zinfo.file_size > ZIP64_LIMIT:
980 if not self._allowZip64:
981 raise LargeZipFile("Filesize would require ZIP64 extensions")
982 if zinfo.header_offset > ZIP64_LIMIT:
983 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000984 raise LargeZipFile(
985 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000986
987 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000988 """Put the bytes from filename into the archive under the name
989 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000990 if not self.fp:
991 raise RuntimeError(
992 "Attempt to write to ZIP archive that was already closed")
993
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000994 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000995 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +0000996 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 date_time = mtime[0:6]
998 # Create ZipInfo instance to store file information
999 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001000 arcname = filename
1001 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1002 while arcname[0] in (os.sep, os.altsep):
1003 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001004 if isdir:
1005 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001006 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001007 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001008 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001009 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010 else:
Tim Peterse1190062001-01-15 03:34:38 +00001011 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001012
1013 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001014 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001015 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001016
1017 self._writecheck(zinfo)
1018 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001019
1020 if isdir:
1021 zinfo.file_size = 0
1022 zinfo.compress_size = 0
1023 zinfo.CRC = 0
1024 self.filelist.append(zinfo)
1025 self.NameToInfo[zinfo.filename] = zinfo
1026 self.fp.write(zinfo.FileHeader())
1027 return
1028
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001029 with open(filename, "rb") as fp:
1030 # Must overwrite CRC and sizes with correct data later
1031 zinfo.CRC = CRC = 0
1032 zinfo.compress_size = compress_size = 0
1033 zinfo.file_size = file_size = 0
1034 self.fp.write(zinfo.FileHeader())
1035 if zinfo.compress_type == ZIP_DEFLATED:
1036 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1037 zlib.DEFLATED, -15)
1038 else:
1039 cmpr = None
1040 while 1:
1041 buf = fp.read(1024 * 8)
1042 if not buf:
1043 break
1044 file_size = file_size + len(buf)
1045 CRC = crc32(buf, CRC) & 0xffffffff
1046 if cmpr:
1047 buf = cmpr.compress(buf)
1048 compress_size = compress_size + len(buf)
1049 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 if cmpr:
1051 buf = cmpr.flush()
1052 compress_size = compress_size + len(buf)
1053 self.fp.write(buf)
1054 zinfo.compress_size = compress_size
1055 else:
1056 zinfo.compress_size = file_size
1057 zinfo.CRC = CRC
1058 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001059 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001060 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001061 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001062 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001064 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065 self.filelist.append(zinfo)
1066 self.NameToInfo[zinfo.filename] = zinfo
1067
Guido van Rossum85825dc2007-08-27 17:03:28 +00001068 def writestr(self, zinfo_or_arcname, data):
1069 """Write a file into the archive. The contents is 'data', which
1070 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1071 it is encoded as UTF-8 first.
1072 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001073 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001074 if isinstance(data, str):
1075 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001076 if not isinstance(zinfo_or_arcname, ZipInfo):
1077 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001078 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001079 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001080 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001081 else:
1082 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001083
1084 if not self.fp:
1085 raise RuntimeError(
1086 "Attempt to write to ZIP archive that was already closed")
1087
Guido van Rossum85825dc2007-08-27 17:03:28 +00001088 zinfo.file_size = len(data) # Uncompressed size
1089 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001090 self._writecheck(zinfo)
1091 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001092 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 if zinfo.compress_type == ZIP_DEFLATED:
1094 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1095 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001096 data = co.compress(data) + co.flush()
1097 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 else:
1099 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001100 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001102 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001103 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001105 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001106 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001107 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108 self.filelist.append(zinfo)
1109 self.NameToInfo[zinfo.filename] = zinfo
1110
1111 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001112 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001113 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114
1115 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001116 """Close the file, and for mode "w" and "a" write the ending
1117 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001118 if self.fp is None:
1119 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001120
1121 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 count = 0
1123 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001124 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 count = count + 1
1126 dt = zinfo.date_time
1127 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001128 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001129 extra = []
1130 if zinfo.file_size > ZIP64_LIMIT \
1131 or zinfo.compress_size > ZIP64_LIMIT:
1132 extra.append(zinfo.file_size)
1133 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001134 file_size = 0xffffffff
1135 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001136 else:
1137 file_size = zinfo.file_size
1138 compress_size = zinfo.compress_size
1139
1140 if zinfo.header_offset > ZIP64_LIMIT:
1141 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001142 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001143 else:
1144 header_offset = zinfo.header_offset
1145
1146 extra_data = zinfo.extra
1147 if extra:
1148 # Append a ZIP64 field to the extra's
1149 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001150 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001151 1, 8*len(extra), *extra) + extra_data
1152
1153 extract_version = max(45, zinfo.extract_version)
1154 create_version = max(45, zinfo.create_version)
1155 else:
1156 extract_version = zinfo.extract_version
1157 create_version = zinfo.create_version
1158
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001159 try:
1160 filename, flag_bits = zinfo._encodeFilenameFlags()
1161 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001162 stringCentralDir, create_version,
1163 zinfo.create_system, extract_version, zinfo.reserved,
1164 flag_bits, zinfo.compress_type, dostime, dosdate,
1165 zinfo.CRC, compress_size, file_size,
1166 len(filename), len(extra_data), len(zinfo.comment),
1167 0, zinfo.internal_attr, zinfo.external_attr,
1168 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001169 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001170 print((structCentralDir, stringCentralDir, create_version,
1171 zinfo.create_system, extract_version, zinfo.reserved,
1172 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1173 zinfo.CRC, compress_size, file_size,
1174 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1175 0, zinfo.internal_attr, zinfo.external_attr,
1176 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001177 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001178 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001179 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001180 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001182
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 pos2 = self.fp.tell()
1184 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001185 centDirCount = count
1186 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001187 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001188 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1189 centDirOffset > ZIP64_LIMIT or
1190 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001191 # Need to write the ZIP64 end-of-archive records
1192 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001193 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001194 44, 45, 45, 0, 0, centDirCount, centDirCount,
1195 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001196 self.fp.write(zip64endrec)
1197
1198 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001199 structEndArchive64Locator,
1200 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001201 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001202 centDirCount = min(centDirCount, 0xFFFF)
1203 centDirSize = min(centDirSize, 0xFFFFFFFF)
1204 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001205
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001206 # check for valid comment length
1207 if len(self.comment) >= ZIP_MAX_COMMENT:
1208 if self.debug > 0:
1209 msg = 'Archive comment is too long; truncating to %d bytes' \
1210 % ZIP_MAX_COMMENT
1211 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001212
Georg Brandl2ee470f2008-07-16 12:55:28 +00001213 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001214 0, 0, centDirCount, centDirCount,
1215 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001216 self.fp.write(endrec)
1217 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001218 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001219
Fred Drake3d9091e2001-03-26 15:49:24 +00001220 if not self._filePassed:
1221 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001222 self.fp = None
1223
1224
1225class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001226 """Class to create ZIP archives with Python library files and packages."""
1227
Georg Brandlfe991052009-09-16 15:54:04 +00001228 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001229 """Add all files from "pathname" to the ZIP archive.
1230
Fred Drake484d7352000-10-02 21:14:52 +00001231 If pathname is a package directory, search the directory and
1232 all package subdirectories recursively for all *.py and enter
1233 the modules into the archive. If pathname is a plain
1234 directory, listdir *.py and enter all modules. Else, pathname
1235 must be a Python *.py file and the module will be put into the
1236 archive. Added modules are always module.pyo or module.pyc.
1237 This method will compile the module.py into module.pyc if
1238 necessary.
1239 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001240 dir, name = os.path.split(pathname)
1241 if os.path.isdir(pathname):
1242 initname = os.path.join(pathname, "__init__.py")
1243 if os.path.isfile(initname):
1244 # This is a package directory, add it
1245 if basename:
1246 basename = "%s/%s" % (basename, name)
1247 else:
1248 basename = name
1249 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001250 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001251 fname, arcname = self._get_codename(initname[0:-3], basename)
1252 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001253 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001254 self.write(fname, arcname)
1255 dirlist = os.listdir(pathname)
1256 dirlist.remove("__init__.py")
1257 # Add all *.py files and package subdirectories
1258 for filename in dirlist:
1259 path = os.path.join(pathname, filename)
1260 root, ext = os.path.splitext(filename)
1261 if os.path.isdir(path):
1262 if os.path.isfile(os.path.join(path, "__init__.py")):
1263 # This is a package directory, add it
1264 self.writepy(path, basename) # Recursive call
1265 elif ext == ".py":
1266 fname, arcname = self._get_codename(path[0:-3],
1267 basename)
1268 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001269 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001270 self.write(fname, arcname)
1271 else:
1272 # This is NOT a package directory, add its files at top level
1273 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001274 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001275 for filename in os.listdir(pathname):
1276 path = os.path.join(pathname, filename)
1277 root, ext = os.path.splitext(filename)
1278 if ext == ".py":
1279 fname, arcname = self._get_codename(path[0:-3],
1280 basename)
1281 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001282 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 self.write(fname, arcname)
1284 else:
1285 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001286 raise RuntimeError(
1287 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001288 fname, arcname = self._get_codename(pathname[0:-3], basename)
1289 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001290 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001291 self.write(fname, arcname)
1292
1293 def _get_codename(self, pathname, basename):
1294 """Return (filename, archivename) for the path.
1295
Fred Drake484d7352000-10-02 21:14:52 +00001296 Given a module name path, return the correct file path and
1297 archive name, compiling if necessary. For example, given
1298 /python/lib/string, return (/python/lib/string.pyc, string).
1299 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001300 file_py = pathname + ".py"
1301 file_pyc = pathname + ".pyc"
1302 file_pyo = pathname + ".pyo"
1303 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001304 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001305 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001306 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001307 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001308 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001309 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001310 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001311 try:
1312 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001313 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001314 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001315 fname = file_pyc
1316 else:
1317 fname = file_pyc
1318 archivename = os.path.split(fname)[1]
1319 if basename:
1320 archivename = "%s/%s" % (basename, archivename)
1321 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001322
1323
1324def main(args = None):
1325 import textwrap
1326 USAGE=textwrap.dedent("""\
1327 Usage:
1328 zipfile.py -l zipfile.zip # Show listing of a zipfile
1329 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1330 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1331 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1332 """)
1333 if args is None:
1334 args = sys.argv[1:]
1335
1336 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001337 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001338 sys.exit(1)
1339
1340 if args[0] == '-l':
1341 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001342 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001343 sys.exit(1)
1344 zf = ZipFile(args[1], 'r')
1345 zf.printdir()
1346 zf.close()
1347
1348 elif args[0] == '-t':
1349 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001350 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001351 sys.exit(1)
1352 zf = ZipFile(args[1], 'r')
1353 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001354 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001355
1356 elif args[0] == '-e':
1357 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001358 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001359 sys.exit(1)
1360
1361 zf = ZipFile(args[1], 'r')
1362 out = args[2]
1363 for path in zf.namelist():
1364 if path.startswith('./'):
1365 tgt = os.path.join(out, path[2:])
1366 else:
1367 tgt = os.path.join(out, path)
1368
1369 tgtdir = os.path.dirname(tgt)
1370 if not os.path.exists(tgtdir):
1371 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001372 with open(tgt, 'wb') as fp:
1373 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001374 zf.close()
1375
1376 elif args[0] == '-c':
1377 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001378 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001379 sys.exit(1)
1380
1381 def addToZip(zf, path, zippath):
1382 if os.path.isfile(path):
1383 zf.write(path, zippath, ZIP_DEFLATED)
1384 elif os.path.isdir(path):
1385 for nm in os.listdir(path):
1386 addToZip(zf,
1387 os.path.join(path, nm), os.path.join(zippath, nm))
1388 # else: ignore
1389
1390 zf = ZipFile(args[1], 'w', allowZip64=True)
1391 for src in args[2:]:
1392 addToZip(zf, src, os.path.basename(src))
1393
1394 zf.close()
1395
1396if __name__ == "__main__":
1397 main()