blob: 46ec6ef065e56668211e787c2dbdd7ae3a022b6c [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Guido van Rossum68937b42007-05-18 00:51:22 +00007import binascii, io
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Thomas Wouters0e3f5912006-08-11 14:57:12 +000031ZIP64_LIMIT= (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000133def is_zipfile(filename):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000134 """Quickly see if file is a ZIP file by checking the magic number."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000135 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000136 fpin = io.open(filename, "rb")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000137 endrec = _EndRecData(fpin)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 fpin.close()
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000139 if endrec:
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000140 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000141 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000142 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000143 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000145def _EndRecData64(fpin, offset, endrec):
146 """
147 Read the ZIP64 end-of-archive records and use that to update endrec
148 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000149 fpin.seek(offset - sizeEndCentDir64Locator, 2)
150 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000151 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
152 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000153 return endrec
154
155 if diskno != 0 or disks != 1:
156 raise BadZipfile("zipfiles that span multiple disks are not supported")
157
158 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000159 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
160 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000161 sig, sz, create_version, read_version, disk_num, disk_dir, \
162 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000163 struct.unpack(structEndArchive64, data)
164 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000165 return endrec
166
167 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000168 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000169 endrec[_ECD_DISK_NUMBER] = disk_num
170 endrec[_ECD_DISK_START] = disk_dir
171 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
172 endrec[_ECD_ENTRIES_TOTAL] = dircount2
173 endrec[_ECD_SIZE] = dirsize
174 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000175 return endrec
176
177
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000178def _EndRecData(fpin):
179 """Return data from the "End of Central Directory" record, or None.
180
181 The data is a list of the nine items in the ZIP "End of central dir"
182 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000183
184 # Determine file size
185 fpin.seek(0, 2)
186 filesize = fpin.tell()
187
188 # Check to see if this is ZIP file with no archive comment (the
189 # "end of central directory" structure should be the last item in the
190 # file if this is the case).
191 fpin.seek(-sizeEndCentDir, 2)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000192 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000193 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000194 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000195 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000196 endrec=list(endrec)
197
198 # Append a blank comment and record start offset
199 endrec.append(b"")
200 endrec.append(filesize - sizeEndCentDir)
201 if endrec[_ECD_OFFSET] == 0xffffffff:
202 # the value for the "offset of the start of the central directory"
203 # indicates that there is a "Zip64 end of central directory"
204 # structure present, so go look for it
205 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
206
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208
209 # Either this is not a ZIP file, or it is a ZIP file with an archive
210 # comment. Search the end of the file for the "end of central directory"
211 # record signature. The comment is the last item in the ZIP file and may be
212 # up to 64K long. It is assumed that the "end of central directory" magic
213 # number does not appear in the comment.
214 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
215 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000216 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218 if start >= 0:
219 # found the magic number; attempt to unpack and interpret
220 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000221 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222 comment = data[start+sizeEndCentDir:]
223 # check that comment length is correct
224 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000225 # Append the archive comment and start offset
226 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec.append(maxCommentStart + start)
228 if endrec[_ECD_OFFSET] == 0xffffffff:
229 # There is apparently a "Zip64 end of central directory"
230 # structure present, so go look for it
231 return _EndRecData64(fpin, start - filesize, endrec)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000232 return endrec
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000233
234 # Unable to find a valid end of central directory structure
235 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236
Fred Drake484d7352000-10-02 21:14:52 +0000237
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000238class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000239 """Class with attributes describing each file in the ZIP archive."""
240
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000241 __slots__ = (
242 'orig_filename',
243 'filename',
244 'date_time',
245 'compress_type',
246 'comment',
247 'extra',
248 'create_system',
249 'create_version',
250 'extract_version',
251 'reserved',
252 'flag_bits',
253 'volume',
254 'internal_attr',
255 'external_attr',
256 'header_offset',
257 'CRC',
258 'compress_size',
259 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000260 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000261 )
262
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000263 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000264 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265
266 # Terminate the file name at the first null byte. Null bytes in file
267 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000268 null_byte = filename.find(chr(0))
269 if null_byte >= 0:
270 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000271 # This is used to ensure paths in generated ZIP files always use
272 # forward slashes as the directory separator, as required by the
273 # ZIP format specification.
274 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000275 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000276
Greg Ward8e36d282003-06-18 00:53:06 +0000277 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000278 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000279 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000280 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000281 self.comment = b"" # Comment for each file
282 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000283 if sys.platform == 'win32':
284 self.create_system = 0 # System which created ZIP archive
285 else:
286 # Assume everything else is unix-y
287 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.create_version = 20 # Version which created ZIP archive
289 self.extract_version = 20 # Version needed to extract archive
290 self.reserved = 0 # Must be zero
291 self.flag_bits = 0 # ZIP flag bits
292 self.volume = 0 # Volume number of file header
293 self.internal_attr = 0 # Internal attributes
294 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000295 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000296 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000297 # CRC CRC-32 of the uncompressed file
298 # compress_size Size of the compressed file
299 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000300
301 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000302 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 dt = self.date_time
304 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000305 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000306 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000307 # Set these to zero because we write them after the file data
308 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000309 else:
Tim Peterse1190062001-01-15 03:34:38 +0000310 CRC = self.CRC
311 compress_size = self.compress_size
312 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000313
314 extra = self.extra
315
316 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
317 # File is larger than what fits into a 4 byte integer,
318 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000319 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000320 extra = extra + struct.pack(fmt,
321 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000322 file_size = 0xffffffff
323 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000324 self.extract_version = max(45, self.extract_version)
325 self.create_version = max(45, self.extract_version)
326
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000327 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000328 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000329 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000330 self.compress_type, dostime, dosdate, CRC,
331 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000332 len(filename), len(extra))
333 return header + filename + extra
334
335 def _encodeFilenameFlags(self):
336 try:
337 return self.filename.encode('ascii'), self.flag_bits
338 except UnicodeEncodeError:
339 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000340
341 def _decodeExtra(self):
342 # Try to decode the extra field.
343 extra = self.extra
344 unpack = struct.unpack
345 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000346 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000347 if tp == 1:
348 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000349 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000351 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000353 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000354 elif ln == 0:
355 counts = ()
356 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000357 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000358
359 idx = 0
360
361 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000362 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000363 self.file_size = counts[idx]
364 idx += 1
365
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000366 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000367 self.compress_size = counts[idx]
368 idx += 1
369
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000370 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000371 old = self.header_offset
372 self.header_offset = counts[idx]
373 idx+=1
374
375 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000376
377
Thomas Wouterscf297e42007-02-23 15:07:44 +0000378class _ZipDecrypter:
379 """Class to handle decryption of files stored within a ZIP archive.
380
381 ZIP supports a password-based form of encryption. Even though known
382 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000383 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000384
385 Usage:
386 zd = _ZipDecrypter(mypwd)
387 plain_char = zd(cypher_char)
388 plain_text = map(zd, cypher_text)
389 """
390
391 def _GenerateCRCTable():
392 """Generate a CRC-32 table.
393
394 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
395 internal keys. We noticed that a direct implementation is faster than
396 relying on binascii.crc32().
397 """
398 poly = 0xedb88320
399 table = [0] * 256
400 for i in range(256):
401 crc = i
402 for j in range(8):
403 if crc & 1:
404 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
405 else:
406 crc = ((crc >> 1) & 0x7FFFFFFF)
407 table[i] = crc
408 return table
409 crctable = _GenerateCRCTable()
410
411 def _crc32(self, ch, crc):
412 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000413 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000414
415 def __init__(self, pwd):
416 self.key0 = 305419896
417 self.key1 = 591751049
418 self.key2 = 878082192
419 for p in pwd:
420 self._UpdateKeys(p)
421
422 def _UpdateKeys(self, c):
423 self.key0 = self._crc32(c, self.key0)
424 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
425 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000426 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000427
428 def __call__(self, c):
429 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000430 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000431 k = self.key2 | 2
432 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000433 self._UpdateKeys(c)
434 return c
435
Guido van Rossumd8faa362007-04-27 19:54:29 +0000436class ZipExtFile:
437 """File-like object for reading an archive member.
438 Is returned by ZipFile.open().
439 """
440
441 def __init__(self, fileobj, zipinfo, decrypt=None):
442 self.fileobj = fileobj
443 self.decrypter = decrypt
444 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000445 self.rawbuffer = b''
446 self.readbuffer = b''
447 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000448 self.eof = False
449 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000450 self.nlSeps = (b"\n", )
451 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000452
453 self.compress_type = zipinfo.compress_type
454 self.compress_size = zipinfo.compress_size
455
456 self.closed = False
457 self.mode = "r"
458 self.name = zipinfo.filename
459
460 # read from compressed files in 64k blocks
461 self.compreadsize = 64*1024
462 if self.compress_type == ZIP_DEFLATED:
463 self.dc = zlib.decompressobj(-15)
464
465 def set_univ_newlines(self, univ_newlines):
466 self.univ_newlines = univ_newlines
467
468 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000469 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000470 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000471 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000472
473 def __iter__(self):
474 return self
475
476 def __next__(self):
477 nextline = self.readline()
478 if not nextline:
479 raise StopIteration()
480
481 return nextline
482
483 def close(self):
484 self.closed = True
485
486 def _checkfornewline(self):
487 nl, nllen = -1, -1
488 if self.linebuffer:
489 # ugly check for cases where half of an \r\n pair was
490 # read on the last pass, and the \r was discarded. In this
491 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000492 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493 self.linebuffer = self.linebuffer[1:]
494
495 for sep in self.nlSeps:
496 nl = self.linebuffer.find(sep)
497 if nl >= 0:
498 nllen = len(sep)
499 return nl, nllen
500
501 return nl, nllen
502
503 def readline(self, size = -1):
504 """Read a line with approx. size. If size is negative,
505 read a whole line.
506 """
507 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000508 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000509 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000510 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000511
512 # check for a newline already in buffer
513 nl, nllen = self._checkfornewline()
514
515 if nl >= 0:
516 # the next line was already in the buffer
517 nl = min(nl, size)
518 else:
519 # no line break in buffer - try to read more
520 size -= len(self.linebuffer)
521 while nl < 0 and size > 0:
522 buf = self.read(min(size, 100))
523 if not buf:
524 break
525 self.linebuffer += buf
526 size -= len(buf)
527
528 # check for a newline in buffer
529 nl, nllen = self._checkfornewline()
530
531 # we either ran out of bytes in the file, or
532 # met the specified size limit without finding a newline,
533 # so return current buffer
534 if nl < 0:
535 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000536 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000537 return s
538
539 buf = self.linebuffer[:nl]
540 self.lastdiscard = self.linebuffer[nl:nl + nllen]
541 self.linebuffer = self.linebuffer[nl + nllen:]
542
543 # line is always returned with \n as newline char (except possibly
544 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000545 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000546
547 def readlines(self, sizehint = -1):
548 """Return a list with all (following) lines. The sizehint parameter
549 is ignored in this implementation.
550 """
551 result = []
552 while True:
553 line = self.readline()
554 if not line: break
555 result.append(line)
556 return result
557
558 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000559 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000560 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000561 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000562
563 # determine read size
564 bytesToRead = self.compress_size - self.bytes_read
565
566 # adjust read size for encrypted files since the first 12 bytes
567 # are for the encryption/password information
568 if self.decrypter is not None:
569 bytesToRead -= 12
570
571 if size is not None and size >= 0:
572 if self.compress_type == ZIP_STORED:
573 lr = len(self.readbuffer)
574 bytesToRead = min(bytesToRead, size - lr)
575 elif self.compress_type == ZIP_DEFLATED:
576 if len(self.readbuffer) > size:
577 # the user has requested fewer bytes than we've already
578 # pulled through the decompressor; don't read any more
579 bytesToRead = 0
580 else:
581 # user will use up the buffer, so read some more
582 lr = len(self.rawbuffer)
583 bytesToRead = min(bytesToRead, self.compreadsize - lr)
584
585 # avoid reading past end of file contents
586 if bytesToRead + self.bytes_read > self.compress_size:
587 bytesToRead = self.compress_size - self.bytes_read
588
589 # try to read from file (if necessary)
590 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000591 data = self.fileobj.read(bytesToRead)
592 self.bytes_read += len(data)
593 try:
594 self.rawbuffer += data
595 except:
596 print(repr(self.fileobj), repr(self.rawbuffer),
597 repr(data))
598 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000599
600 # handle contents of raw buffer
601 if self.rawbuffer:
602 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000603 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604
605 # decrypt new data if we were given an object to handle that
606 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000607 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000608
609 # decompress newly read data if necessary
610 if newdata and self.compress_type == ZIP_DEFLATED:
611 newdata = self.dc.decompress(newdata)
612 self.rawbuffer = self.dc.unconsumed_tail
613 if self.eof and len(self.rawbuffer) == 0:
614 # we're out of raw bytes (both from the file and
615 # the local buffer); flush just to make sure the
616 # decompressor is done
617 newdata += self.dc.flush()
618 # prevent decompressor from being used again
619 self.dc = None
620
621 self.readbuffer += newdata
622
623
624 # return what the user asked for
625 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000626 data = self.readbuffer
627 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000628 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000629 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000630 self.readbuffer = self.readbuffer[size:]
631
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000632 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000633
634
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000635class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000636 """ Class with methods to open, read, write, close, list zip files.
637
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000638 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000639
Fred Drake3d9091e2001-03-26 15:49:24 +0000640 file: Either the path to the file, or a file-like object.
641 If it is a path, the file will be opened and closed by ZipFile.
642 mode: The mode can be either read "r", write "w" or append "a".
643 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000644 allowZip64: if True ZipFile will create files with ZIP64 extensions when
645 needed, otherwise it will raise an exception when this would
646 be necessary.
647
Fred Drake3d9091e2001-03-26 15:49:24 +0000648 """
Fred Drake484d7352000-10-02 21:14:52 +0000649
Fred Drake90eac282001-02-28 05:29:34 +0000650 fp = None # Set here since __del__ checks it
651
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000652 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000653 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000654 if mode not in ("r", "w", "a"):
655 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
656
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000657 if compression == ZIP_STORED:
658 pass
659 elif compression == ZIP_DEFLATED:
660 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000661 raise RuntimeError(
662 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000663 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000664 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000665
666 self._allowZip64 = allowZip64
667 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000668 self.debug = 0 # Level of printing: 0 through 3
669 self.NameToInfo = {} # Find file info given name
670 self.filelist = [] # List of ZipInfo instances for archive
671 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000672 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000673 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000674 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000675
Fred Drake3d9091e2001-03-26 15:49:24 +0000676 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000677 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000678 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000679 self._filePassed = 0
680 self.filename = file
681 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000682 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000683 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000684 except IOError:
685 if mode == 'a':
686 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000687 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000688 else:
689 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000690 else:
691 self._filePassed = 1
692 self.fp = file
693 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000694
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000695 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000696 self._GetContents()
697 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000698 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000700 try: # See if file is a zip file
701 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000702 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000703 self.fp.seek(self.start_dir, 0)
704 except BadZipfile: # file is not a zip file, just append
705 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000707 if not self._filePassed:
708 self.fp.close()
709 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000710 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000711
712 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000713 """Read the directory, making sure we close the file if the format
714 is bad."""
715 try:
716 self._RealGetContents()
717 except BadZipfile:
718 if not self._filePassed:
719 self.fp.close()
720 self.fp = None
721 raise
722
723 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000724 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000726 endrec = _EndRecData(fp)
727 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000728 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000730 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000731 size_cd = endrec[_ECD_SIZE] # bytes in central directory
732 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
733 self.comment = endrec[_ECD_COMMENT] # archive comment
734
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000736 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000737 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
738 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000739 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
740
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000742 inferred = concat + offset_cd
743 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744 # self.start_dir: Position of start of central directory
745 self.start_dir = offset_cd + concat
746 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000747 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000748 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000749 total = 0
750 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000751 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000752 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000753 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000754 centdir = struct.unpack(structCentralDir, centdir)
755 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000756 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000757 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000758 flags = centdir[5]
759 if flags & 0x800:
760 # UTF-8 file names extension
761 filename = filename.decode('utf-8')
762 else:
763 # Historical ZIP filename encoding
764 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000766 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000767 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
768 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000769 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 (x.create_version, x.create_system, x.extract_version, x.reserved,
771 x.flag_bits, x.compress_type, t, d,
772 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
773 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
774 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000775 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000776 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000777 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000778
779 x._decodeExtra()
780 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 self.filelist.append(x)
782 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000783
784 # update total bytes read from central directory
785 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
786 + centdir[_CD_EXTRA_FIELD_LENGTH]
787 + centdir[_CD_COMMENT_LENGTH])
788
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000790 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000791
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792
793 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000794 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000795 l = []
796 for data in self.filelist:
797 l.append(data.filename)
798 return l
799
800 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000801 """Return a list of class ZipInfo instances for files in the
802 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 return self.filelist
804
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000805 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000806 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000807 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
808 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000810 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000811 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
812 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813
814 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000815 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000816 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 for zinfo in self.filelist:
818 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000819 # Read by chunks, to avoid an OverflowError or a
820 # MemoryError with very large embedded files.
821 f = self.open(zinfo.filename, "r")
822 while f.read(chunk_size): # Check CRC-32
823 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000824 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 return zinfo.filename
826
827 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000828 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000829 info = self.NameToInfo.get(name)
830 if info is None:
831 raise KeyError(
832 'There is no item named %r in the archive' % name)
833
834 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835
Thomas Wouterscf297e42007-02-23 15:07:44 +0000836 def setpassword(self, pwd):
837 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000838 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000839 self.pwd = pwd
840
841 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000842 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843 return self.open(name, "r", pwd).read()
844
845 def open(self, name, mode="r", pwd=None):
846 """Return file-like object for 'name'."""
847 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000848 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000849 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000850 raise RuntimeError(
851 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000852
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853 # Only open a new file for instances where we were not
854 # given a file object in the constructor
855 if self._filePassed:
856 zef_file = self.fp
857 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000858 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859
Georg Brandlb533e262008-05-25 18:19:30 +0000860 # Make sure we have an info object
861 if isinstance(name, ZipInfo):
862 # 'name' is already an info object
863 zinfo = name
864 else:
865 # Get info object for name
866 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
868 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000869
870 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000871 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000872 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000873 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000874
875 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000877 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000879
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000880 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000881 raise BadZipfile(
882 'File name in directory %r and header %r differ.'
883 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000884
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 # check for encrypted flag & handle password
886 is_encrypted = zinfo.flag_bits & 0x1
887 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000888 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889 if not pwd:
890 pwd = self.pwd
891 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000892 raise RuntimeError("File %s is encrypted, "
893 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894
Thomas Wouterscf297e42007-02-23 15:07:44 +0000895 zd = _ZipDecrypter(pwd)
896 # The first 12 bytes in the cypher stream is an encryption header
897 # used to strengthen the algorithm. The first 11 bytes are
898 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000899 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000900 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000902 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000903 if zinfo.flag_bits & 0x8:
904 # compare against the file type from extended local headers
905 check_byte = (zinfo._raw_time >> 8) & 0xff
906 else:
907 # compare against the CRC otherwise
908 check_byte = (zinfo.CRC >> 24) & 0xff
909 if h[11] != check_byte:
910 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911
912 # build and return a ZipExtFile
913 if zd is None:
914 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000916 zef = ZipExtFile(zef_file, zinfo, zd)
917
918 # set universal newlines on ZipExtFile if necessary
919 if "U" in mode:
920 zef.set_univ_newlines(True)
921 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000922
Christian Heimes790c8232008-01-07 21:14:23 +0000923 def extract(self, member, path=None, pwd=None):
924 """Extract a member from the archive to the current working directory,
925 using its full name. Its file information is extracted as accurately
926 as possible. `member' may be a filename or a ZipInfo object. You can
927 specify a different directory using `path'.
928 """
929 if not isinstance(member, ZipInfo):
930 member = self.getinfo(member)
931
932 if path is None:
933 path = os.getcwd()
934
935 return self._extract_member(member, path, pwd)
936
937 def extractall(self, path=None, members=None, pwd=None):
938 """Extract all members from the archive to the current working
939 directory. `path' specifies a different directory to extract to.
940 `members' is optional and must be a subset of the list returned
941 by namelist().
942 """
943 if members is None:
944 members = self.namelist()
945
946 for zipinfo in members:
947 self.extract(zipinfo, path, pwd)
948
949 def _extract_member(self, member, targetpath, pwd):
950 """Extract the ZipInfo object 'member' to a physical
951 file on the path targetpath.
952 """
953 # build the destination pathname, replacing
954 # forward slashes to platform specific separators.
955 if targetpath[-1:] == "/":
956 targetpath = targetpath[:-1]
957
958 # don't include leading "/" from file name if present
959 if os.path.isabs(member.filename):
960 targetpath = os.path.join(targetpath, member.filename[1:])
961 else:
962 targetpath = os.path.join(targetpath, member.filename)
963
964 targetpath = os.path.normpath(targetpath)
965
966 # Create all upper directories if necessary.
967 upperdirs = os.path.dirname(targetpath)
968 if upperdirs and not os.path.exists(upperdirs):
969 os.makedirs(upperdirs)
970
Georg Brandlb533e262008-05-25 18:19:30 +0000971 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000972 target = open(targetpath, "wb")
973 shutil.copyfileobj(source, target)
974 source.close()
975 target.close()
976
977 return targetpath
978
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000979 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000980 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000981 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000982 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000983 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000985 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000986 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000987 raise RuntimeError(
988 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000989 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000990 raise RuntimeError(
991 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000992 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000993 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000994 if zinfo.file_size > ZIP64_LIMIT:
995 if not self._allowZip64:
996 raise LargeZipFile("Filesize would require ZIP64 extensions")
997 if zinfo.header_offset > ZIP64_LIMIT:
998 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000999 raise LargeZipFile(
1000 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001
1002 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001003 """Put the bytes from filename into the archive under the name
1004 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001005 if not self.fp:
1006 raise RuntimeError(
1007 "Attempt to write to ZIP archive that was already closed")
1008
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001009 st = os.stat(filename)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001010 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001011 date_time = mtime[0:6]
1012 # Create ZipInfo instance to store file information
1013 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001014 arcname = filename
1015 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1016 while arcname[0] in (os.sep, os.altsep):
1017 arcname = arcname[1:]
1018 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001019 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001020 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001021 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001022 else:
Tim Peterse1190062001-01-15 03:34:38 +00001023 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001024
1025 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001026 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001027 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001028
1029 self._writecheck(zinfo)
1030 self._didModify = True
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001031 fp = io.open(filename, "rb")
Finn Bock03a3bb82001-09-05 18:40:33 +00001032 # Must overwrite CRC and sizes with correct data later
1033 zinfo.CRC = CRC = 0
1034 zinfo.compress_size = compress_size = 0
1035 zinfo.file_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 self.fp.write(zinfo.FileHeader())
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001037 if zinfo.compress_type == ZIP_DEFLATED:
1038 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1039 zlib.DEFLATED, -15)
1040 else:
1041 cmpr = None
1042 while 1:
1043 buf = fp.read(1024 * 8)
1044 if not buf:
1045 break
1046 file_size = file_size + len(buf)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001047 CRC = crc32(buf, CRC) & 0xffffffff
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 if cmpr:
1049 buf = cmpr.compress(buf)
1050 compress_size = compress_size + len(buf)
1051 self.fp.write(buf)
1052 fp.close()
1053 if cmpr:
1054 buf = cmpr.flush()
1055 compress_size = compress_size + len(buf)
1056 self.fp.write(buf)
1057 zinfo.compress_size = compress_size
1058 else:
1059 zinfo.compress_size = file_size
1060 zinfo.CRC = CRC
1061 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001062 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001063 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001064 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001065 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001066 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001067 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001068 self.filelist.append(zinfo)
1069 self.NameToInfo[zinfo.filename] = zinfo
1070
Guido van Rossum85825dc2007-08-27 17:03:28 +00001071 def writestr(self, zinfo_or_arcname, data):
1072 """Write a file into the archive. The contents is 'data', which
1073 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1074 it is encoded as UTF-8 first.
1075 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001076 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001077 if isinstance(data, str):
1078 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001079 if not isinstance(zinfo_or_arcname, ZipInfo):
1080 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001081 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001082 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001083 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001084 else:
1085 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001086
1087 if not self.fp:
1088 raise RuntimeError(
1089 "Attempt to write to ZIP archive that was already closed")
1090
Guido van Rossum85825dc2007-08-27 17:03:28 +00001091 zinfo.file_size = len(data) # Uncompressed size
1092 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001093 self._writecheck(zinfo)
1094 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001095 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001096 if zinfo.compress_type == ZIP_DEFLATED:
1097 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1098 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001099 data = co.compress(data) + co.flush()
1100 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 else:
1102 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001103 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001105 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001106 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001107 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001108 # Write CRC and file sizes after the file data
Brett Cannonff450f72004-07-10 19:09:20 +00001109 self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001110 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 self.filelist.append(zinfo)
1112 self.NameToInfo[zinfo.filename] = zinfo
1113
1114 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001115 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001116 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001117
1118 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001119 """Close the file, and for mode "w" and "a" write the ending
1120 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001121 if self.fp is None:
1122 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001123
1124 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 count = 0
1126 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001127 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001128 count = count + 1
1129 dt = zinfo.date_time
1130 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001131 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001132 extra = []
1133 if zinfo.file_size > ZIP64_LIMIT \
1134 or zinfo.compress_size > ZIP64_LIMIT:
1135 extra.append(zinfo.file_size)
1136 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001137 file_size = 0xffffffff
1138 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001139 else:
1140 file_size = zinfo.file_size
1141 compress_size = zinfo.compress_size
1142
1143 if zinfo.header_offset > ZIP64_LIMIT:
1144 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001145 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001146 else:
1147 header_offset = zinfo.header_offset
1148
1149 extra_data = zinfo.extra
1150 if extra:
1151 # Append a ZIP64 field to the extra's
1152 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001153 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001154 1, 8*len(extra), *extra) + extra_data
1155
1156 extract_version = max(45, zinfo.extract_version)
1157 create_version = max(45, zinfo.create_version)
1158 else:
1159 extract_version = zinfo.extract_version
1160 create_version = zinfo.create_version
1161
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001162 try:
1163 filename, flag_bits = zinfo._encodeFilenameFlags()
1164 centdir = struct.pack(structCentralDir,
Georg Brandl2ee470f2008-07-16 12:55:28 +00001165 stringCentralDir, create_version,
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001166 zinfo.create_system, extract_version, zinfo.reserved,
1167 flag_bits, zinfo.compress_type, dostime, dosdate,
1168 zinfo.CRC, compress_size, file_size,
1169 len(filename), len(extra_data), len(zinfo.comment),
1170 0, zinfo.internal_attr, zinfo.external_attr,
1171 header_offset)
1172 except DeprecationWarning:
1173 print >>sys.stderr, (structCentralDir,
1174 stringCentralDir, create_version,
1175 zinfo.create_system, extract_version, zinfo.reserved,
1176 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1177 zinfo.CRC, compress_size, file_size,
1178 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1179 0, zinfo.internal_attr, zinfo.external_attr,
1180 header_offset)
1181 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001182 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001183 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001184 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001185 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001186
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001187 pos2 = self.fp.tell()
1188 # Write end-of-zip-archive record
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001189 centDirOffset = pos1
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001190 if pos1 > ZIP64_LIMIT:
1191 # Need to write the ZIP64 end-of-archive records
1192 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001193 structEndArchive64, stringEndArchive64,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001194 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
1195 self.fp.write(zip64endrec)
1196
1197 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001198 structEndArchive64Locator,
1199 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001200 self.fp.write(zip64locrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001201 centDirOffset = 0xFFFFFFFF
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001202
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001203 # check for valid comment length
1204 if len(self.comment) >= ZIP_MAX_COMMENT:
1205 if self.debug > 0:
1206 msg = 'Archive comment is too long; truncating to %d bytes' \
1207 % ZIP_MAX_COMMENT
1208 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001209
Georg Brandl2ee470f2008-07-16 12:55:28 +00001210 endrec = struct.pack(structEndArchive, stringEndArchive,
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001211 0, 0, count % ZIP_FILECOUNT_LIMIT,
1212 count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
1213 centDirOffset, len(self.comment))
1214 self.fp.write(endrec)
1215 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001216 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001217
Fred Drake3d9091e2001-03-26 15:49:24 +00001218 if not self._filePassed:
1219 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001220 self.fp = None
1221
1222
1223class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001224 """Class to create ZIP archives with Python library files and packages."""
1225
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001226 def writepy(self, pathname, basename = ""):
1227 """Add all files from "pathname" to the ZIP archive.
1228
Fred Drake484d7352000-10-02 21:14:52 +00001229 If pathname is a package directory, search the directory and
1230 all package subdirectories recursively for all *.py and enter
1231 the modules into the archive. If pathname is a plain
1232 directory, listdir *.py and enter all modules. Else, pathname
1233 must be a Python *.py file and the module will be put into the
1234 archive. Added modules are always module.pyo or module.pyc.
1235 This method will compile the module.py into module.pyc if
1236 necessary.
1237 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001238 dir, name = os.path.split(pathname)
1239 if os.path.isdir(pathname):
1240 initname = os.path.join(pathname, "__init__.py")
1241 if os.path.isfile(initname):
1242 # This is a package directory, add it
1243 if basename:
1244 basename = "%s/%s" % (basename, name)
1245 else:
1246 basename = name
1247 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001248 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001249 fname, arcname = self._get_codename(initname[0:-3], basename)
1250 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001251 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001252 self.write(fname, arcname)
1253 dirlist = os.listdir(pathname)
1254 dirlist.remove("__init__.py")
1255 # Add all *.py files and package subdirectories
1256 for filename in dirlist:
1257 path = os.path.join(pathname, filename)
1258 root, ext = os.path.splitext(filename)
1259 if os.path.isdir(path):
1260 if os.path.isfile(os.path.join(path, "__init__.py")):
1261 # This is a package directory, add it
1262 self.writepy(path, basename) # Recursive call
1263 elif ext == ".py":
1264 fname, arcname = self._get_codename(path[0:-3],
1265 basename)
1266 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001267 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001268 self.write(fname, arcname)
1269 else:
1270 # This is NOT a package directory, add its files at top level
1271 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001272 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001273 for filename in os.listdir(pathname):
1274 path = os.path.join(pathname, filename)
1275 root, ext = os.path.splitext(filename)
1276 if ext == ".py":
1277 fname, arcname = self._get_codename(path[0:-3],
1278 basename)
1279 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001280 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001281 self.write(fname, arcname)
1282 else:
1283 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001284 raise RuntimeError(
1285 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001286 fname, arcname = self._get_codename(pathname[0:-3], basename)
1287 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001288 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001289 self.write(fname, arcname)
1290
1291 def _get_codename(self, pathname, basename):
1292 """Return (filename, archivename) for the path.
1293
Fred Drake484d7352000-10-02 21:14:52 +00001294 Given a module name path, return the correct file path and
1295 archive name, compiling if necessary. For example, given
1296 /python/lib/string, return (/python/lib/string.pyc, string).
1297 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 file_py = pathname + ".py"
1299 file_pyc = pathname + ".pyc"
1300 file_pyo = pathname + ".pyo"
1301 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001302 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001303 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001305 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001306 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001308 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001309 try:
1310 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001311 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001312 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 fname = file_pyc
1314 else:
1315 fname = file_pyc
1316 archivename = os.path.split(fname)[1]
1317 if basename:
1318 archivename = "%s/%s" % (basename, archivename)
1319 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001320
1321
1322def main(args = None):
1323 import textwrap
1324 USAGE=textwrap.dedent("""\
1325 Usage:
1326 zipfile.py -l zipfile.zip # Show listing of a zipfile
1327 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1328 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1329 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1330 """)
1331 if args is None:
1332 args = sys.argv[1:]
1333
1334 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001335 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001336 sys.exit(1)
1337
1338 if args[0] == '-l':
1339 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001340 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001341 sys.exit(1)
1342 zf = ZipFile(args[1], 'r')
1343 zf.printdir()
1344 zf.close()
1345
1346 elif args[0] == '-t':
1347 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001348 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001349 sys.exit(1)
1350 zf = ZipFile(args[1], 'r')
1351 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001352 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001353
1354 elif args[0] == '-e':
1355 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001356 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001357 sys.exit(1)
1358
1359 zf = ZipFile(args[1], 'r')
1360 out = args[2]
1361 for path in zf.namelist():
1362 if path.startswith('./'):
1363 tgt = os.path.join(out, path[2:])
1364 else:
1365 tgt = os.path.join(out, path)
1366
1367 tgtdir = os.path.dirname(tgt)
1368 if not os.path.exists(tgtdir):
1369 os.makedirs(tgtdir)
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001370 fp = io.open(tgt, 'wb')
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001371 fp.write(zf.read(path))
1372 fp.close()
1373 zf.close()
1374
1375 elif args[0] == '-c':
1376 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001377 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001378 sys.exit(1)
1379
1380 def addToZip(zf, path, zippath):
1381 if os.path.isfile(path):
1382 zf.write(path, zippath, ZIP_DEFLATED)
1383 elif os.path.isdir(path):
1384 for nm in os.listdir(path):
1385 addToZip(zf,
1386 os.path.join(path, nm), os.path.join(zippath, nm))
1387 # else: ignore
1388
1389 zf = ZipFile(args[1], 'w', allowZip64=True)
1390 for src in args[2:]:
1391 addToZip(zf, src, os.path.basename(src))
1392
1393 zf.close()
1394
1395if __name__ == "__main__":
1396 main()