blob: f70cf63d7e7915b0cd2e22bf12466c742d3d9418 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Christian Heimes790c8232008-01-07 21:14:23 +00006import struct, os, time, sys, shutil
Martin v. Löwis59e47792009-01-24 14:10:07 +00007import binascii, io, stat
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000021
22
23class LargeZipFile(Exception):
24 """
25 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwisb09b8442008-07-03 14:13:42 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000045
Martin v. Löwisb09b8442008-07-03 14:13:42 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000048structEndArchive = b"<4s4H2LH"
49stringEndArchive = b"PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000068stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwisb09b8442008-07-03 14:13:42 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +000095stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = b"PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = b"PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000161 fpin.seek(offset - sizeEndCentDir64Locator, 2)
162 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000163 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
164 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000165 return endrec
166
167 if diskno != 0 or disks != 1:
168 raise BadZipfile("zipfiles that span multiple disks are not supported")
169
170 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000171 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
172 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000173 sig, sz, create_version, read_version, disk_num, disk_dir, \
174 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000175 struct.unpack(structEndArchive64, data)
176 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000177 return endrec
178
179 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000180 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000181 endrec[_ECD_DISK_NUMBER] = disk_num
182 endrec[_ECD_DISK_START] = disk_dir
183 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
184 endrec[_ECD_ENTRIES_TOTAL] = dircount2
185 endrec[_ECD_SIZE] = dirsize
186 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000187 return endrec
188
189
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000190def _EndRecData(fpin):
191 """Return data from the "End of Central Directory" record, or None.
192
193 The data is a list of the nine items in the ZIP "End of central dir"
194 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000195
196 # Determine file size
197 fpin.seek(0, 2)
198 filesize = fpin.tell()
199
200 # Check to see if this is ZIP file with no archive comment (the
201 # "end of central directory" structure should be the last item in the
202 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000203 try:
204 fpin.seek(-sizeEndCentDir, 2)
205 except IOError:
206 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000207 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000208 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000209 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000210 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211 endrec=list(endrec)
212
213 # Append a blank comment and record start offset
214 endrec.append(b"")
215 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000216
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000217 # Try to read the "Zip64 end of central directory" structure
218 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000219
220 # Either this is not a ZIP file, or it is a ZIP file with an archive
221 # comment. Search the end of the file for the "end of central directory"
222 # record signature. The comment is the last item in the ZIP file and may be
223 # up to 64K long. It is assumed that the "end of central directory" magic
224 # number does not appear in the comment.
225 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
226 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000227 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000228 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000229 if start >= 0:
230 # found the magic number; attempt to unpack and interpret
231 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000232 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000233 comment = data[start+sizeEndCentDir:]
234 # check that comment length is correct
235 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 # Append the archive comment and start offset
237 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, maxCommentStart + start - filesize,
242 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000243
244 # Unable to find a valid end of central directory structure
245 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000246
Fred Drake484d7352000-10-02 21:14:52 +0000247
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000248class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000249 """Class with attributes describing each file in the ZIP archive."""
250
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000251 __slots__ = (
252 'orig_filename',
253 'filename',
254 'date_time',
255 'compress_type',
256 'comment',
257 'extra',
258 'create_system',
259 'create_version',
260 'extract_version',
261 'reserved',
262 'flag_bits',
263 'volume',
264 'internal_attr',
265 'external_attr',
266 'header_offset',
267 'CRC',
268 'compress_size',
269 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000270 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000271 )
272
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000273 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000274 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000275
276 # Terminate the file name at the first null byte. Null bytes in file
277 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000278 null_byte = filename.find(chr(0))
279 if null_byte >= 0:
280 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000281 # This is used to ensure paths in generated ZIP files always use
282 # forward slashes as the directory separator, as required by the
283 # ZIP format specification.
284 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000285 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000286
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000288 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000290 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000291 self.comment = b"" # Comment for each file
292 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000293 if sys.platform == 'win32':
294 self.create_system = 0 # System which created ZIP archive
295 else:
296 # Assume everything else is unix-y
297 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000298 self.create_version = 20 # Version which created ZIP archive
299 self.extract_version = 20 # Version needed to extract archive
300 self.reserved = 0 # Must be zero
301 self.flag_bits = 0 # ZIP flag bits
302 self.volume = 0 # Volume number of file header
303 self.internal_attr = 0 # Internal attributes
304 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000306 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000307 # CRC CRC-32 of the uncompressed file
308 # compress_size Size of the compressed file
309 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000310
311 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000312 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000313 dt = self.date_time
314 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000315 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000316 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000317 # Set these to zero because we write them after the file data
318 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 else:
Tim Peterse1190062001-01-15 03:34:38 +0000320 CRC = self.CRC
321 compress_size = self.compress_size
322 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000323
324 extra = self.extra
325
326 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
327 # File is larger than what fits into a 4 byte integer,
328 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000329 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000330 extra = extra + struct.pack(fmt,
331 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000332 file_size = 0xffffffff
333 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000334 self.extract_version = max(45, self.extract_version)
335 self.create_version = max(45, self.extract_version)
336
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000337 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000338 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000339 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000340 self.compress_type, dostime, dosdate, CRC,
341 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000342 len(filename), len(extra))
343 return header + filename + extra
344
345 def _encodeFilenameFlags(self):
346 try:
347 return self.filename.encode('ascii'), self.flag_bits
348 except UnicodeEncodeError:
349 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350
351 def _decodeExtra(self):
352 # Try to decode the extra field.
353 extra = self.extra
354 unpack = struct.unpack
355 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000356 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000357 if tp == 1:
358 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000359 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000360 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000361 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000362 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000363 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 elif ln == 0:
365 counts = ()
366 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000367 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368
369 idx = 0
370
371 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 self.file_size = counts[idx]
374 idx += 1
375
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000376 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377 self.compress_size = counts[idx]
378 idx += 1
379
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000380 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000381 old = self.header_offset
382 self.header_offset = counts[idx]
383 idx+=1
384
385 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000386
387
Thomas Wouterscf297e42007-02-23 15:07:44 +0000388class _ZipDecrypter:
389 """Class to handle decryption of files stored within a ZIP archive.
390
391 ZIP supports a password-based form of encryption. Even though known
392 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000393 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000394
395 Usage:
396 zd = _ZipDecrypter(mypwd)
397 plain_char = zd(cypher_char)
398 plain_text = map(zd, cypher_text)
399 """
400
401 def _GenerateCRCTable():
402 """Generate a CRC-32 table.
403
404 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
405 internal keys. We noticed that a direct implementation is faster than
406 relying on binascii.crc32().
407 """
408 poly = 0xedb88320
409 table = [0] * 256
410 for i in range(256):
411 crc = i
412 for j in range(8):
413 if crc & 1:
414 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
415 else:
416 crc = ((crc >> 1) & 0x7FFFFFFF)
417 table[i] = crc
418 return table
419 crctable = _GenerateCRCTable()
420
421 def _crc32(self, ch, crc):
422 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000423 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000424
425 def __init__(self, pwd):
426 self.key0 = 305419896
427 self.key1 = 591751049
428 self.key2 = 878082192
429 for p in pwd:
430 self._UpdateKeys(p)
431
432 def _UpdateKeys(self, c):
433 self.key0 = self._crc32(c, self.key0)
434 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
435 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000436 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000437
438 def __call__(self, c):
439 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000440 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000441 k = self.key2 | 2
442 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000443 self._UpdateKeys(c)
444 return c
445
Guido van Rossumd8faa362007-04-27 19:54:29 +0000446class ZipExtFile:
447 """File-like object for reading an archive member.
448 Is returned by ZipFile.open().
449 """
450
451 def __init__(self, fileobj, zipinfo, decrypt=None):
452 self.fileobj = fileobj
453 self.decrypter = decrypt
454 self.bytes_read = 0
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000455 self.rawbuffer = b''
456 self.readbuffer = b''
457 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000458 self.eof = False
459 self.univ_newlines = False
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000460 self.nlSeps = (b"\n", )
461 self.lastdiscard = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462
463 self.compress_type = zipinfo.compress_type
464 self.compress_size = zipinfo.compress_size
465
466 self.closed = False
467 self.mode = "r"
468 self.name = zipinfo.filename
469
470 # read from compressed files in 64k blocks
471 self.compreadsize = 64*1024
472 if self.compress_type == ZIP_DEFLATED:
473 self.dc = zlib.decompressobj(-15)
474
475 def set_univ_newlines(self, univ_newlines):
476 self.univ_newlines = univ_newlines
477
478 # pick line separator char(s) based on universal newlines flag
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000479 self.nlSeps = (b"\n", )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000480 if self.univ_newlines:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000481 self.nlSeps = (b"\r\n", b"\r", b"\n")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000482
483 def __iter__(self):
484 return self
485
486 def __next__(self):
487 nextline = self.readline()
488 if not nextline:
489 raise StopIteration()
490
491 return nextline
492
493 def close(self):
494 self.closed = True
495
496 def _checkfornewline(self):
497 nl, nllen = -1, -1
498 if self.linebuffer:
499 # ugly check for cases where half of an \r\n pair was
500 # read on the last pass, and the \r was discarded. In this
501 # case we just throw away the \n at the start of the buffer.
Guido van Rossum814661e2007-07-18 22:07:29 +0000502 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000503 self.linebuffer = self.linebuffer[1:]
504
505 for sep in self.nlSeps:
506 nl = self.linebuffer.find(sep)
507 if nl >= 0:
508 nllen = len(sep)
509 return nl, nllen
510
511 return nl, nllen
512
513 def readline(self, size = -1):
514 """Read a line with approx. size. If size is negative,
515 read a whole line.
516 """
517 if size < 0:
Christian Heimesa37d4c62007-12-04 23:02:19 +0000518 size = sys.maxsize
Guido van Rossumd8faa362007-04-27 19:54:29 +0000519 elif size == 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000520 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000521
522 # check for a newline already in buffer
523 nl, nllen = self._checkfornewline()
524
525 if nl >= 0:
526 # the next line was already in the buffer
527 nl = min(nl, size)
528 else:
529 # no line break in buffer - try to read more
530 size -= len(self.linebuffer)
531 while nl < 0 and size > 0:
532 buf = self.read(min(size, 100))
533 if not buf:
534 break
535 self.linebuffer += buf
536 size -= len(buf)
537
538 # check for a newline in buffer
539 nl, nllen = self._checkfornewline()
540
541 # we either ran out of bytes in the file, or
542 # met the specified size limit without finding a newline,
543 # so return current buffer
544 if nl < 0:
545 s = self.linebuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000546 self.linebuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000547 return s
548
549 buf = self.linebuffer[:nl]
550 self.lastdiscard = self.linebuffer[nl:nl + nllen]
551 self.linebuffer = self.linebuffer[nl + nllen:]
552
553 # line is always returned with \n as newline char (except possibly
554 # for a final incomplete line in the file, which is handled above).
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000555 return buf + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
557 def readlines(self, sizehint = -1):
558 """Return a list with all (following) lines. The sizehint parameter
559 is ignored in this implementation.
560 """
561 result = []
562 while True:
563 line = self.readline()
564 if not line: break
565 result.append(line)
566 return result
567
568 def read(self, size = None):
Guido van Rossum814661e2007-07-18 22:07:29 +0000569 # act like file obj and return empty string if size is 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000570 if size == 0:
Guido van Rossum814661e2007-07-18 22:07:29 +0000571 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000572
573 # determine read size
574 bytesToRead = self.compress_size - self.bytes_read
575
576 # adjust read size for encrypted files since the first 12 bytes
577 # are for the encryption/password information
578 if self.decrypter is not None:
579 bytesToRead -= 12
580
581 if size is not None and size >= 0:
582 if self.compress_type == ZIP_STORED:
583 lr = len(self.readbuffer)
584 bytesToRead = min(bytesToRead, size - lr)
585 elif self.compress_type == ZIP_DEFLATED:
586 if len(self.readbuffer) > size:
587 # the user has requested fewer bytes than we've already
588 # pulled through the decompressor; don't read any more
589 bytesToRead = 0
590 else:
591 # user will use up the buffer, so read some more
592 lr = len(self.rawbuffer)
593 bytesToRead = min(bytesToRead, self.compreadsize - lr)
594
595 # avoid reading past end of file contents
596 if bytesToRead + self.bytes_read > self.compress_size:
597 bytesToRead = self.compress_size - self.bytes_read
598
599 # try to read from file (if necessary)
600 if bytesToRead > 0:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000601 data = self.fileobj.read(bytesToRead)
602 self.bytes_read += len(data)
603 try:
604 self.rawbuffer += data
605 except:
606 print(repr(self.fileobj), repr(self.rawbuffer),
607 repr(data))
608 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000609
610 # handle contents of raw buffer
611 if self.rawbuffer:
612 newdata = self.rawbuffer
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000613 self.rawbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000614
615 # decrypt new data if we were given an object to handle that
616 if newdata and self.decrypter is not None:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000617 newdata = bytes(map(self.decrypter, newdata))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618
619 # decompress newly read data if necessary
620 if newdata and self.compress_type == ZIP_DEFLATED:
621 newdata = self.dc.decompress(newdata)
622 self.rawbuffer = self.dc.unconsumed_tail
623 if self.eof and len(self.rawbuffer) == 0:
624 # we're out of raw bytes (both from the file and
625 # the local buffer); flush just to make sure the
626 # decompressor is done
627 newdata += self.dc.flush()
628 # prevent decompressor from being used again
629 self.dc = None
630
631 self.readbuffer += newdata
632
633
634 # return what the user asked for
635 if size is None or len(self.readbuffer) <= size:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000636 data = self.readbuffer
637 self.readbuffer = b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000638 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000639 data = self.readbuffer[:size]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000640 self.readbuffer = self.readbuffer[size:]
641
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000642 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000643
644
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000645class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000646 """ Class with methods to open, read, write, close, list zip files.
647
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000648 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000649
Fred Drake3d9091e2001-03-26 15:49:24 +0000650 file: Either the path to the file, or a file-like object.
651 If it is a path, the file will be opened and closed by ZipFile.
652 mode: The mode can be either read "r", write "w" or append "a".
653 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000654 allowZip64: if True ZipFile will create files with ZIP64 extensions when
655 needed, otherwise it will raise an exception when this would
656 be necessary.
657
Fred Drake3d9091e2001-03-26 15:49:24 +0000658 """
Fred Drake484d7352000-10-02 21:14:52 +0000659
Fred Drake90eac282001-02-28 05:29:34 +0000660 fp = None # Set here since __del__ checks it
661
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000662 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000663 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000664 if mode not in ("r", "w", "a"):
665 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
666
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000667 if compression == ZIP_STORED:
668 pass
669 elif compression == ZIP_DEFLATED:
670 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000671 raise RuntimeError(
672 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000673 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000674 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000675
676 self._allowZip64 = allowZip64
677 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000678 self.debug = 0 # Level of printing: 0 through 3
679 self.NameToInfo = {} # Find file info given name
680 self.filelist = [] # List of ZipInfo instances for archive
681 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000682 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000683 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000684 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000685
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000687 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000688 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000689 self._filePassed = 0
690 self.filename = file
691 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000692 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000693 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000694 except IOError:
695 if mode == 'a':
696 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000697 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000698 else:
699 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000700 else:
701 self._filePassed = 1
702 self.fp = file
703 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000704
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000705 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 self._GetContents()
707 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000710 try: # See if file is a zip file
711 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000713 self.fp.seek(self.start_dir, 0)
714 except BadZipfile: # file is not a zip file, just append
715 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000717 if not self._filePassed:
718 self.fp.close()
719 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000720 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000722 def __enter__(self):
723 return self
724
725 def __exit__(self, type, value, traceback):
726 self.close()
727
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000729 """Read the directory, making sure we close the file if the format
730 is bad."""
731 try:
732 self._RealGetContents()
733 except BadZipfile:
734 if not self._filePassed:
735 self.fp.close()
736 self.fp = None
737 raise
738
739 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000740 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000742 endrec = _EndRecData(fp)
743 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000744 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000745 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000746 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000747 size_cd = endrec[_ECD_SIZE] # bytes in central directory
748 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
749 self.comment = endrec[_ECD_COMMENT] # archive comment
750
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000752 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000753 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
754 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000755 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
756
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000758 inferred = concat + offset_cd
759 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 # self.start_dir: Position of start of central directory
761 self.start_dir = offset_cd + concat
762 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000763 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000764 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 total = 0
766 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000767 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000768 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000769 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 centdir = struct.unpack(structCentralDir, centdir)
771 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000772 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000773 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000774 flags = centdir[5]
775 if flags & 0x800:
776 # UTF-8 file names extension
777 filename = filename.decode('utf-8')
778 else:
779 # Historical ZIP filename encoding
780 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000782 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000783 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
784 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000785 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 (x.create_version, x.create_system, x.extract_version, x.reserved,
787 x.flag_bits, x.compress_type, t, d,
788 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
789 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
790 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000791 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000793 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000794
795 x._decodeExtra()
796 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000797 self.filelist.append(x)
798 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000799
800 # update total bytes read from central directory
801 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
802 + centdir[_CD_EXTRA_FIELD_LENGTH]
803 + centdir[_CD_COMMENT_LENGTH])
804
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000806 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000807
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000808
809 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000810 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 l = []
812 for data in self.filelist:
813 l.append(data.filename)
814 return l
815
816 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000817 """Return a list of class ZipInfo instances for files in the
818 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 return self.filelist
820
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000821 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000822 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000823 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
824 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000826 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000827 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
828 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829
830 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000831 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000832 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 for zinfo in self.filelist:
834 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000835 # Read by chunks, to avoid an OverflowError or a
836 # MemoryError with very large embedded files.
837 f = self.open(zinfo.filename, "r")
838 while f.read(chunk_size): # Check CRC-32
839 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000840 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841 return zinfo.filename
842
843 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000844 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000845 info = self.NameToInfo.get(name)
846 if info is None:
847 raise KeyError(
848 'There is no item named %r in the archive' % name)
849
850 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000851
Thomas Wouterscf297e42007-02-23 15:07:44 +0000852 def setpassword(self, pwd):
853 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000854 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000855 self.pwd = pwd
856
857 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000858 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 return self.open(name, "r", pwd).read()
860
861 def open(self, name, mode="r", pwd=None):
862 """Return file-like object for 'name'."""
863 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000864 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000865 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000866 raise RuntimeError(
867 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000868
Guido van Rossumd8faa362007-04-27 19:54:29 +0000869 # Only open a new file for instances where we were not
870 # given a file object in the constructor
871 if self._filePassed:
872 zef_file = self.fp
873 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000874 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875
Georg Brandlb533e262008-05-25 18:19:30 +0000876 # Make sure we have an info object
877 if isinstance(name, ZipInfo):
878 # 'name' is already an info object
879 zinfo = name
880 else:
881 # Get info object for name
882 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883
884 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000885
886 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000887 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000888 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000889 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000890
891 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000893 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000895
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000896 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000897 raise BadZipfile(
898 'File name in directory %r and header %r differ.'
899 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000900
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 # check for encrypted flag & handle password
902 is_encrypted = zinfo.flag_bits & 0x1
903 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000904 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905 if not pwd:
906 pwd = self.pwd
907 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000908 raise RuntimeError("File %s is encrypted, "
909 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910
Thomas Wouterscf297e42007-02-23 15:07:44 +0000911 zd = _ZipDecrypter(pwd)
912 # The first 12 bytes in the cypher stream is an encryption header
913 # used to strengthen the algorithm. The first 11 bytes are
914 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000915 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000916 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000918 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000919 if zinfo.flag_bits & 0x8:
920 # compare against the file type from extended local headers
921 check_byte = (zinfo._raw_time >> 8) & 0xff
922 else:
923 # compare against the CRC otherwise
924 check_byte = (zinfo.CRC >> 24) & 0xff
925 if h[11] != check_byte:
926 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927
928 # build and return a ZipExtFile
929 if zd is None:
930 zef = ZipExtFile(zef_file, zinfo)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000931 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 zef = ZipExtFile(zef_file, zinfo, zd)
933
934 # set universal newlines on ZipExtFile if necessary
935 if "U" in mode:
936 zef.set_univ_newlines(True)
937 return zef
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000938
Christian Heimes790c8232008-01-07 21:14:23 +0000939 def extract(self, member, path=None, pwd=None):
940 """Extract a member from the archive to the current working directory,
941 using its full name. Its file information is extracted as accurately
942 as possible. `member' may be a filename or a ZipInfo object. You can
943 specify a different directory using `path'.
944 """
945 if not isinstance(member, ZipInfo):
946 member = self.getinfo(member)
947
948 if path is None:
949 path = os.getcwd()
950
951 return self._extract_member(member, path, pwd)
952
953 def extractall(self, path=None, members=None, pwd=None):
954 """Extract all members from the archive to the current working
955 directory. `path' specifies a different directory to extract to.
956 `members' is optional and must be a subset of the list returned
957 by namelist().
958 """
959 if members is None:
960 members = self.namelist()
961
962 for zipinfo in members:
963 self.extract(zipinfo, path, pwd)
964
965 def _extract_member(self, member, targetpath, pwd):
966 """Extract the ZipInfo object 'member' to a physical
967 file on the path targetpath.
968 """
969 # build the destination pathname, replacing
970 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000971 # Strip trailing path separator, unless it represents the root.
972 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
973 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000974 targetpath = targetpath[:-1]
975
976 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000977 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000978 targetpath = os.path.join(targetpath, member.filename[1:])
979 else:
980 targetpath = os.path.join(targetpath, member.filename)
981
982 targetpath = os.path.normpath(targetpath)
983
984 # Create all upper directories if necessary.
985 upperdirs = os.path.dirname(targetpath)
986 if upperdirs and not os.path.exists(upperdirs):
987 os.makedirs(upperdirs)
988
Martin v. Löwis59e47792009-01-24 14:10:07 +0000989 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000990 if not os.path.isdir(targetpath):
991 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000992 return targetpath
993
Georg Brandlb533e262008-05-25 18:19:30 +0000994 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000995 target = open(targetpath, "wb")
996 shutil.copyfileobj(source, target)
997 source.close()
998 target.close()
999
1000 return targetpath
1001
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001002 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001003 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001004 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001005 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001006 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001007 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001008 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001009 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001010 raise RuntimeError(
1011 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001012 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001013 raise RuntimeError(
1014 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001016 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001017 if zinfo.file_size > ZIP64_LIMIT:
1018 if not self._allowZip64:
1019 raise LargeZipFile("Filesize would require ZIP64 extensions")
1020 if zinfo.header_offset > ZIP64_LIMIT:
1021 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001022 raise LargeZipFile(
1023 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001024
1025 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001026 """Put the bytes from filename into the archive under the name
1027 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001028 if not self.fp:
1029 raise RuntimeError(
1030 "Attempt to write to ZIP archive that was already closed")
1031
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001032 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001033 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001034 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001035 date_time = mtime[0:6]
1036 # Create ZipInfo instance to store file information
1037 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001038 arcname = filename
1039 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1040 while arcname[0] in (os.sep, os.altsep):
1041 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001042 if isdir:
1043 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001044 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001045 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001046 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001047 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 else:
Tim Peterse1190062001-01-15 03:34:38 +00001049 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001050
1051 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001052 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001053 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001054
1055 self._writecheck(zinfo)
1056 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001057
1058 if isdir:
1059 zinfo.file_size = 0
1060 zinfo.compress_size = 0
1061 zinfo.CRC = 0
1062 self.filelist.append(zinfo)
1063 self.NameToInfo[zinfo.filename] = zinfo
1064 self.fp.write(zinfo.FileHeader())
1065 return
1066
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001067 with open(filename, "rb") as fp:
1068 # Must overwrite CRC and sizes with correct data later
1069 zinfo.CRC = CRC = 0
1070 zinfo.compress_size = compress_size = 0
1071 zinfo.file_size = file_size = 0
1072 self.fp.write(zinfo.FileHeader())
1073 if zinfo.compress_type == ZIP_DEFLATED:
1074 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1075 zlib.DEFLATED, -15)
1076 else:
1077 cmpr = None
1078 while 1:
1079 buf = fp.read(1024 * 8)
1080 if not buf:
1081 break
1082 file_size = file_size + len(buf)
1083 CRC = crc32(buf, CRC) & 0xffffffff
1084 if cmpr:
1085 buf = cmpr.compress(buf)
1086 compress_size = compress_size + len(buf)
1087 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001088 if cmpr:
1089 buf = cmpr.flush()
1090 compress_size = compress_size + len(buf)
1091 self.fp.write(buf)
1092 zinfo.compress_size = compress_size
1093 else:
1094 zinfo.compress_size = file_size
1095 zinfo.CRC = CRC
1096 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001097 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001098 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001099 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001100 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001102 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 self.filelist.append(zinfo)
1104 self.NameToInfo[zinfo.filename] = zinfo
1105
Guido van Rossum85825dc2007-08-27 17:03:28 +00001106 def writestr(self, zinfo_or_arcname, data):
1107 """Write a file into the archive. The contents is 'data', which
1108 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1109 it is encoded as UTF-8 first.
1110 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001111 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001112 if isinstance(data, str):
1113 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001114 if not isinstance(zinfo_or_arcname, ZipInfo):
1115 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001116 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001117 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001118 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001119 else:
1120 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001121
1122 if not self.fp:
1123 raise RuntimeError(
1124 "Attempt to write to ZIP archive that was already closed")
1125
Guido van Rossum85825dc2007-08-27 17:03:28 +00001126 zinfo.file_size = len(data) # Uncompressed size
1127 zinfo.header_offset = self.fp.tell() # Start of header data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001128 self._writecheck(zinfo)
1129 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001130 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001131 if zinfo.compress_type == ZIP_DEFLATED:
1132 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1133 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001134 data = co.compress(data) + co.flush()
1135 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001136 else:
1137 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001138 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001140 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001141 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001142 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001143 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001144 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001145 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146 self.filelist.append(zinfo)
1147 self.NameToInfo[zinfo.filename] = zinfo
1148
1149 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001150 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001151 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001152
1153 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001154 """Close the file, and for mode "w" and "a" write the ending
1155 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001156 if self.fp is None:
1157 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158
1159 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001160 count = 0
1161 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001162 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001163 count = count + 1
1164 dt = zinfo.date_time
1165 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001166 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001167 extra = []
1168 if zinfo.file_size > ZIP64_LIMIT \
1169 or zinfo.compress_size > ZIP64_LIMIT:
1170 extra.append(zinfo.file_size)
1171 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001172 file_size = 0xffffffff
1173 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001174 else:
1175 file_size = zinfo.file_size
1176 compress_size = zinfo.compress_size
1177
1178 if zinfo.header_offset > ZIP64_LIMIT:
1179 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001180 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001181 else:
1182 header_offset = zinfo.header_offset
1183
1184 extra_data = zinfo.extra
1185 if extra:
1186 # Append a ZIP64 field to the extra's
1187 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001188 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001189 1, 8*len(extra), *extra) + extra_data
1190
1191 extract_version = max(45, zinfo.extract_version)
1192 create_version = max(45, zinfo.create_version)
1193 else:
1194 extract_version = zinfo.extract_version
1195 create_version = zinfo.create_version
1196
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001197 try:
1198 filename, flag_bits = zinfo._encodeFilenameFlags()
1199 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001200 stringCentralDir, create_version,
1201 zinfo.create_system, extract_version, zinfo.reserved,
1202 flag_bits, zinfo.compress_type, dostime, dosdate,
1203 zinfo.CRC, compress_size, file_size,
1204 len(filename), len(extra_data), len(zinfo.comment),
1205 0, zinfo.internal_attr, zinfo.external_attr,
1206 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001207 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001208 print((structCentralDir, stringCentralDir, create_version,
1209 zinfo.create_system, extract_version, zinfo.reserved,
1210 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1211 zinfo.CRC, compress_size, file_size,
1212 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1213 0, zinfo.internal_attr, zinfo.external_attr,
1214 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001215 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001216 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001217 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001218 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001220
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 pos2 = self.fp.tell()
1222 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001223 centDirCount = count
1224 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001225 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001226 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1227 centDirOffset > ZIP64_LIMIT or
1228 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001229 # Need to write the ZIP64 end-of-archive records
1230 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001231 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001232 44, 45, 45, 0, 0, centDirCount, centDirCount,
1233 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001234 self.fp.write(zip64endrec)
1235
1236 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001237 structEndArchive64Locator,
1238 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001239 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001240 centDirCount = min(centDirCount, 0xFFFF)
1241 centDirSize = min(centDirSize, 0xFFFFFFFF)
1242 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001243
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001244 # check for valid comment length
1245 if len(self.comment) >= ZIP_MAX_COMMENT:
1246 if self.debug > 0:
1247 msg = 'Archive comment is too long; truncating to %d bytes' \
1248 % ZIP_MAX_COMMENT
1249 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001250
Georg Brandl2ee470f2008-07-16 12:55:28 +00001251 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001252 0, 0, centDirCount, centDirCount,
1253 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001254 self.fp.write(endrec)
1255 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001256 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001257
Fred Drake3d9091e2001-03-26 15:49:24 +00001258 if not self._filePassed:
1259 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001260 self.fp = None
1261
1262
1263class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001264 """Class to create ZIP archives with Python library files and packages."""
1265
Georg Brandlfe991052009-09-16 15:54:04 +00001266 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001267 """Add all files from "pathname" to the ZIP archive.
1268
Fred Drake484d7352000-10-02 21:14:52 +00001269 If pathname is a package directory, search the directory and
1270 all package subdirectories recursively for all *.py and enter
1271 the modules into the archive. If pathname is a plain
1272 directory, listdir *.py and enter all modules. Else, pathname
1273 must be a Python *.py file and the module will be put into the
1274 archive. Added modules are always module.pyo or module.pyc.
1275 This method will compile the module.py into module.pyc if
1276 necessary.
1277 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001278 dir, name = os.path.split(pathname)
1279 if os.path.isdir(pathname):
1280 initname = os.path.join(pathname, "__init__.py")
1281 if os.path.isfile(initname):
1282 # This is a package directory, add it
1283 if basename:
1284 basename = "%s/%s" % (basename, name)
1285 else:
1286 basename = name
1287 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001288 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001289 fname, arcname = self._get_codename(initname[0:-3], basename)
1290 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001291 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001292 self.write(fname, arcname)
1293 dirlist = os.listdir(pathname)
1294 dirlist.remove("__init__.py")
1295 # Add all *.py files and package subdirectories
1296 for filename in dirlist:
1297 path = os.path.join(pathname, filename)
1298 root, ext = os.path.splitext(filename)
1299 if os.path.isdir(path):
1300 if os.path.isfile(os.path.join(path, "__init__.py")):
1301 # This is a package directory, add it
1302 self.writepy(path, basename) # Recursive call
1303 elif ext == ".py":
1304 fname, arcname = self._get_codename(path[0:-3],
1305 basename)
1306 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001307 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001308 self.write(fname, arcname)
1309 else:
1310 # This is NOT a package directory, add its files at top level
1311 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001312 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001313 for filename in os.listdir(pathname):
1314 path = os.path.join(pathname, filename)
1315 root, ext = os.path.splitext(filename)
1316 if ext == ".py":
1317 fname, arcname = self._get_codename(path[0:-3],
1318 basename)
1319 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001320 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001321 self.write(fname, arcname)
1322 else:
1323 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001324 raise RuntimeError(
1325 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 fname, arcname = self._get_codename(pathname[0:-3], basename)
1327 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001328 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001329 self.write(fname, arcname)
1330
1331 def _get_codename(self, pathname, basename):
1332 """Return (filename, archivename) for the path.
1333
Fred Drake484d7352000-10-02 21:14:52 +00001334 Given a module name path, return the correct file path and
1335 archive name, compiling if necessary. For example, given
1336 /python/lib/string, return (/python/lib/string.pyc, string).
1337 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 file_py = pathname + ".py"
1339 file_pyc = pathname + ".pyc"
1340 file_pyo = pathname + ".pyo"
1341 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001342 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001343 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001345 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001346 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001347 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001348 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001349 try:
1350 py_compile.compile(file_py, file_pyc, None, True)
Guido van Rossumb940e112007-01-10 16:19:56 +00001351 except py_compile.PyCompileError as err:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001352 print(err.msg)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001353 fname = file_pyc
1354 else:
1355 fname = file_pyc
1356 archivename = os.path.split(fname)[1]
1357 if basename:
1358 archivename = "%s/%s" % (basename, archivename)
1359 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001360
1361
1362def main(args = None):
1363 import textwrap
1364 USAGE=textwrap.dedent("""\
1365 Usage:
1366 zipfile.py -l zipfile.zip # Show listing of a zipfile
1367 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1368 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1369 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1370 """)
1371 if args is None:
1372 args = sys.argv[1:]
1373
1374 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001375 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001376 sys.exit(1)
1377
1378 if args[0] == '-l':
1379 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001380 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001381 sys.exit(1)
1382 zf = ZipFile(args[1], 'r')
1383 zf.printdir()
1384 zf.close()
1385
1386 elif args[0] == '-t':
1387 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001388 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389 sys.exit(1)
1390 zf = ZipFile(args[1], 'r')
1391 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001392 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001393
1394 elif args[0] == '-e':
1395 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001396 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001397 sys.exit(1)
1398
1399 zf = ZipFile(args[1], 'r')
1400 out = args[2]
1401 for path in zf.namelist():
1402 if path.startswith('./'):
1403 tgt = os.path.join(out, path[2:])
1404 else:
1405 tgt = os.path.join(out, path)
1406
1407 tgtdir = os.path.dirname(tgt)
1408 if not os.path.exists(tgtdir):
1409 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001410 with open(tgt, 'wb') as fp:
1411 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001412 zf.close()
1413
1414 elif args[0] == '-c':
1415 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001416 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001417 sys.exit(1)
1418
1419 def addToZip(zf, path, zippath):
1420 if os.path.isfile(path):
1421 zf.write(path, zippath, ZIP_DEFLATED)
1422 elif os.path.isdir(path):
1423 for nm in os.listdir(path):
1424 addToZip(zf,
1425 os.path.join(path, nm), os.path.join(zippath, nm))
1426 # else: ignore
1427
1428 zf = ZipFile(args[1], 'w', allowZip64=True)
1429 for src in args[2:]:
1430 addToZip(zf, src, os.path.basename(src))
1431
1432 zf.close()
1433
1434if __name__ == "__main__":
1435 main()