blob: 5cc7816e0c383c53f1c6bf00ede26cb4da6e0124 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
180 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000184 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000191 struct.unpack(structEndArchive64, data)
192 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000196 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 endrec[_ECD_DISK_NUMBER] = disk_num
198 endrec[_ECD_DISK_START] = disk_dir
199 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
200 endrec[_ECD_ENTRIES_TOTAL] = dircount2
201 endrec[_ECD_SIZE] = dirsize
202 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206def _EndRecData(fpin):
207 """Return data from the "End of Central Directory" record, or None.
208
209 The data is a list of the nine items in the ZIP "End of central dir"
210 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211
212 # Determine file size
213 fpin.seek(0, 2)
214 filesize = fpin.tell()
215
216 # Check to see if this is ZIP file with no archive comment (the
217 # "end of central directory" structure should be the last item in the
218 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000219 try:
220 fpin.seek(-sizeEndCentDir, 2)
221 except IOError:
222 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000224 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec=list(endrec)
228
229 # Append a blank comment and record start offset
230 endrec.append(b"")
231 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253
R David Murray4fbb9db2011-06-09 15:50:51 -0400254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
258 # Unable to find a valid end of central directory structure
259 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000260
Fred Drake484d7352000-10-02 21:14:52 +0000261
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000263 """Class with attributes describing each file in the ZIP archive."""
264
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000284 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 )
286
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000288 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000299 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000300
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000303 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000304 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000305 self.comment = b"" # Comment for each file
306 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000307 if sys.platform == 'win32':
308 self.create_system = 0 # System which created ZIP archive
309 else:
310 # Assume everything else is unix-y
311 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000312 self.create_version = 20 # Version which created ZIP archive
313 self.extract_version = 20 # Version needed to extract archive
314 self.reserved = 0 # Must be zero
315 self.flag_bits = 0 # ZIP flag bits
316 self.volume = 0 # Volume number of file header
317 self.internal_attr = 0 # Internal attributes
318 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000320 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000321 # CRC CRC-32 of the uncompressed file
322 # compress_size Size of the compressed file
323 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000324
325 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000326 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327 dt = self.date_time
328 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000329 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000330 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000331 # Set these to zero because we write them after the file data
332 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000333 else:
Tim Peterse1190062001-01-15 03:34:38 +0000334 CRC = self.CRC
335 compress_size = self.compress_size
336 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000337
338 extra = self.extra
339
340 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
341 # File is larger than what fits into a 4 byte integer,
342 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000343 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000344 extra = extra + struct.pack(fmt,
345 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000346 file_size = 0xffffffff
347 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000348 self.extract_version = max(45, self.extract_version)
349 self.create_version = max(45, self.extract_version)
350
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000351 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000352 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000353 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000354 self.compress_type, dostime, dosdate, CRC,
355 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000356 len(filename), len(extra))
357 return header + filename + extra
358
359 def _encodeFilenameFlags(self):
360 try:
361 return self.filename.encode('ascii'), self.flag_bits
362 except UnicodeEncodeError:
363 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364
365 def _decodeExtra(self):
366 # Try to decode the extra field.
367 extra = self.extra
368 unpack = struct.unpack
369 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000370 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000371 if tp == 1:
372 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000373 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000374 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000375 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000376 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 elif ln == 0:
379 counts = ()
380 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000381 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382
383 idx = 0
384
385 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000386 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000387 self.file_size = counts[idx]
388 idx += 1
389
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000390 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391 self.compress_size = counts[idx]
392 idx += 1
393
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000394 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395 old = self.header_offset
396 self.header_offset = counts[idx]
397 idx+=1
398
399 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000400
401
Thomas Wouterscf297e42007-02-23 15:07:44 +0000402class _ZipDecrypter:
403 """Class to handle decryption of files stored within a ZIP archive.
404
405 ZIP supports a password-based form of encryption. Even though known
406 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000407 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000408
409 Usage:
410 zd = _ZipDecrypter(mypwd)
411 plain_char = zd(cypher_char)
412 plain_text = map(zd, cypher_text)
413 """
414
415 def _GenerateCRCTable():
416 """Generate a CRC-32 table.
417
418 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
419 internal keys. We noticed that a direct implementation is faster than
420 relying on binascii.crc32().
421 """
422 poly = 0xedb88320
423 table = [0] * 256
424 for i in range(256):
425 crc = i
426 for j in range(8):
427 if crc & 1:
428 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
429 else:
430 crc = ((crc >> 1) & 0x7FFFFFFF)
431 table[i] = crc
432 return table
433 crctable = _GenerateCRCTable()
434
435 def _crc32(self, ch, crc):
436 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000437 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000438
439 def __init__(self, pwd):
440 self.key0 = 305419896
441 self.key1 = 591751049
442 self.key2 = 878082192
443 for p in pwd:
444 self._UpdateKeys(p)
445
446 def _UpdateKeys(self, c):
447 self.key0 = self._crc32(c, self.key0)
448 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
449 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000450 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000451
452 def __call__(self, c):
453 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000454 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000455 k = self.key2 | 2
456 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000457 self._UpdateKeys(c)
458 return c
459
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000460class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000461 """File-like object for reading an archive member.
462 Is returned by ZipFile.open().
463 """
464
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000465 # Max size supported by decompressor.
466 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000467
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000468 # Read from compressed files in 4k blocks.
469 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000470
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000471 # Search for universal newlines or line chunks.
472 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
473
Łukasz Langae94980a2010-11-22 23:31:26 +0000474 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
475 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000476 self._fileobj = fileobj
477 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000478 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000479
Ezio Melotti92b47432010-01-28 01:44:41 +0000480 self._compress_type = zipinfo.compress_type
481 self._compress_size = zipinfo.compress_size
482 self._compress_left = zipinfo.compress_size
483
484 if self._compress_type == ZIP_DEFLATED:
485 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000486 self._unconsumed = b''
487
488 self._readbuffer = b''
489 self._offset = 0
490
491 self._universal = 'U' in mode
492 self.newlines = None
493
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000494 # Adjust read size for encrypted files since the first 12 bytes
495 # are for the encryption/password information.
496 if self._decrypter is not None:
497 self._compress_left -= 12
498
499 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000500 self.name = zipinfo.filename
501
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000502 if hasattr(zipinfo, 'CRC'):
503 self._expected_crc = zipinfo.CRC
504 self._running_crc = crc32(b'') & 0xffffffff
505 else:
506 self._expected_crc = None
507
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000508 def readline(self, limit=-1):
509 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000510
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000511 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000512 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000513
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000514 if not self._universal and limit < 0:
515 # Shortcut common case - newline found in buffer.
516 i = self._readbuffer.find(b'\n', self._offset) + 1
517 if i > 0:
518 line = self._readbuffer[self._offset: i]
519 self._offset = i
520 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000521
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000522 if not self._universal:
523 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000524
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000525 line = b''
526 while limit < 0 or len(line) < limit:
527 readahead = self.peek(2)
528 if readahead == b'':
529 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000530
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000531 #
532 # Search for universal newlines or line chunks.
533 #
534 # The pattern returns either a line chunk or a newline, but not
535 # both. Combined with peek(2), we are assured that the sequence
536 # '\r\n' is always retrieved completely and never split into
537 # separate newlines - '\r', '\n' due to coincidental readaheads.
538 #
539 match = self.PATTERN.search(readahead)
540 newline = match.group('newline')
541 if newline is not None:
542 if self.newlines is None:
543 self.newlines = []
544 if newline not in self.newlines:
545 self.newlines.append(newline)
546 self._offset += len(newline)
547 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000548
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000549 chunk = match.group('chunk')
550 if limit >= 0:
551 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000553 self._offset += len(chunk)
554 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000555
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000556 return line
557
558 def peek(self, n=1):
559 """Returns buffered bytes without advancing the position."""
560 if n > len(self._readbuffer) - self._offset:
561 chunk = self.read(n)
562 self._offset -= len(chunk)
563
564 # Return up to 512 bytes to reduce allocation overhead for tight loops.
565 return self._readbuffer[self._offset: self._offset + 512]
566
567 def readable(self):
568 return True
569
570 def read(self, n=-1):
571 """Read and return up to n bytes.
572 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000574 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000575 if n is None:
576 n = -1
577 while True:
578 if n < 0:
579 data = self.read1(n)
580 elif n > len(buf):
581 data = self.read1(n - len(buf))
582 else:
583 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000584 if len(data) == 0:
585 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000586 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000587
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000588 def _update_crc(self, newdata, eof):
589 # Update the CRC using the given data.
590 if self._expected_crc is None:
591 # No need to compute the CRC if we don't have a reference value
592 return
593 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
594 # Check the CRC if we're at the end of the file
595 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000596 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000597
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000598 def read1(self, n):
599 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000600
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000601 # Simplify algorithm (branching) by transforming negative n to large n.
602 if n < 0 or n is None:
603 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000605 # Bytes available in read buffer.
606 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000607
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000608 # Read from file.
609 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
610 nbytes = n - len_readbuffer - len(self._unconsumed)
611 nbytes = max(nbytes, self.MIN_READ_SIZE)
612 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000613
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000614 data = self._fileobj.read(nbytes)
615 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000616
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000617 if data and self._decrypter is not None:
618 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000619
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000620 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000621 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622 self._readbuffer = self._readbuffer[self._offset:] + data
623 self._offset = 0
624 else:
625 # Prepare deflated bytes for decompression.
626 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000627
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000628 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000629 if (len(self._unconsumed) > 0 and n > len_readbuffer and
630 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000631 data = self._decompressor.decompress(
632 self._unconsumed,
633 max(n - len_readbuffer, self.MIN_READ_SIZE)
634 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000635
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000636 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000637 eof = len(self._unconsumed) == 0 and self._compress_left == 0
638 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000639 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000640
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000641 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000642 self._readbuffer = self._readbuffer[self._offset:] + data
643 self._offset = 0
644
645 # Read from buffer.
646 data = self._readbuffer[self._offset: self._offset + n]
647 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000648 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000649
Łukasz Langae94980a2010-11-22 23:31:26 +0000650 def close(self):
651 try:
652 if self._close_fileobj:
653 self._fileobj.close()
654 finally:
655 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000656
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000657
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000658class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000659 """ Class with methods to open, read, write, close, list zip files.
660
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000661 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000662
Fred Drake3d9091e2001-03-26 15:49:24 +0000663 file: Either the path to the file, or a file-like object.
664 If it is a path, the file will be opened and closed by ZipFile.
665 mode: The mode can be either read "r", write "w" or append "a".
666 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000667 allowZip64: if True ZipFile will create files with ZIP64 extensions when
668 needed, otherwise it will raise an exception when this would
669 be necessary.
670
Fred Drake3d9091e2001-03-26 15:49:24 +0000671 """
Fred Drake484d7352000-10-02 21:14:52 +0000672
Fred Drake90eac282001-02-28 05:29:34 +0000673 fp = None # Set here since __del__ checks it
674
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000675 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000676 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000677 if mode not in ("r", "w", "a"):
678 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
679
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000680 if compression == ZIP_STORED:
681 pass
682 elif compression == ZIP_DEFLATED:
683 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000684 raise RuntimeError(
685 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000686 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000687 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000688
689 self._allowZip64 = allowZip64
690 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000691 self.debug = 0 # Level of printing: 0 through 3
692 self.NameToInfo = {} # Find file info given name
693 self.filelist = [] # List of ZipInfo instances for archive
694 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000695 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000696 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000697 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000698
Fred Drake3d9091e2001-03-26 15:49:24 +0000699 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000700 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000701 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000702 self._filePassed = 0
703 self.filename = file
704 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000705 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000706 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000707 except IOError:
708 if mode == 'a':
709 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000710 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000711 else:
712 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000713 else:
714 self._filePassed = 1
715 self.fp = file
716 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000717
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 self._GetContents()
720 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000721 # set the modified flag so central directory gets written
722 # even if no files are added to the archive
723 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000724 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000725 try:
726 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000727 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000729 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000730 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000731 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000732 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000733
734 # set the modified flag so central directory gets written
735 # even if no files are added to the archive
736 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000737 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000738 if not self._filePassed:
739 self.fp.close()
740 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000741 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000743 def __enter__(self):
744 return self
745
746 def __exit__(self, type, value, traceback):
747 self.close()
748
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000749 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000750 """Read the directory, making sure we close the file if the format
751 is bad."""
752 try:
753 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000754 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000755 if not self._filePassed:
756 self.fp.close()
757 self.fp = None
758 raise
759
760 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000761 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000763 try:
764 endrec = _EndRecData(fp)
765 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000766 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000767 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000768 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000769 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000770 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000771 size_cd = endrec[_ECD_SIZE] # bytes in central directory
772 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
773 self.comment = endrec[_ECD_COMMENT] # archive comment
774
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000776 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000777 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
778 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000779 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
780
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000782 inferred = concat + offset_cd
783 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000784 # self.start_dir: Position of start of central directory
785 self.start_dir = offset_cd + concat
786 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000787 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000788 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 total = 0
790 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000791 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000792 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000793 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 centdir = struct.unpack(structCentralDir, centdir)
795 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000796 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000797 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000798 flags = centdir[5]
799 if flags & 0x800:
800 # UTF-8 file names extension
801 filename = filename.decode('utf-8')
802 else:
803 # Historical ZIP filename encoding
804 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000806 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000807 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
808 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000809 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000810 (x.create_version, x.create_system, x.extract_version, x.reserved,
811 x.flag_bits, x.compress_type, t, d,
812 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
813 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
814 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000815 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000816 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000817 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000818
819 x._decodeExtra()
820 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821 self.filelist.append(x)
822 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000823
824 # update total bytes read from central directory
825 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
826 + centdir[_CD_EXTRA_FIELD_LENGTH]
827 + centdir[_CD_COMMENT_LENGTH])
828
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000830 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000831
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832
833 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000834 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 l = []
836 for data in self.filelist:
837 l.append(data.filename)
838 return l
839
840 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000841 """Return a list of class ZipInfo instances for files in the
842 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000843 return self.filelist
844
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000845 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000846 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000847 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
848 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000849 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000850 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000851 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
852 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000853
854 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000855 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000856 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000857 for zinfo in self.filelist:
858 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000859 # Read by chunks, to avoid an OverflowError or a
860 # MemoryError with very large embedded files.
861 f = self.open(zinfo.filename, "r")
862 while f.read(chunk_size): # Check CRC-32
863 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000864 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000865 return zinfo.filename
866
867 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000868 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000869 info = self.NameToInfo.get(name)
870 if info is None:
871 raise KeyError(
872 'There is no item named %r in the archive' % name)
873
874 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000875
Thomas Wouterscf297e42007-02-23 15:07:44 +0000876 def setpassword(self, pwd):
877 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000878 if pwd and not isinstance(pwd, bytes):
879 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
880 if pwd:
881 self.pwd = pwd
882 else:
883 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000884
885 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000886 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000887 with self.open(name, "r", pwd) as fp:
888 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889
890 def open(self, name, mode="r", pwd=None):
891 """Return file-like object for 'name'."""
892 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000893 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +0000894 if pwd and not isinstance(pwd, bytes):
895 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000896 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000897 raise RuntimeError(
898 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000899
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 # Only open a new file for instances where we were not
901 # given a file object in the constructor
902 if self._filePassed:
903 zef_file = self.fp
904 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000905 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906
Georg Brandlb533e262008-05-25 18:19:30 +0000907 # Make sure we have an info object
908 if isinstance(name, ZipInfo):
909 # 'name' is already an info object
910 zinfo = name
911 else:
912 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000913 try:
914 zinfo = self.getinfo(name)
915 except KeyError:
916 if not self._filePassed:
917 zef_file.close()
918 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000920
921 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000922 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000923 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +0000924 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000925
926 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000928 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000930
Georg Brandl5ba11de2011-01-01 10:09:32 +0000931 if zinfo.flag_bits & 0x800:
932 # UTF-8 filename
933 fname_str = fname.decode("utf-8")
934 else:
935 fname_str = fname.decode("cp437")
936
937 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000938 if not self._filePassed:
939 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +0000940 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +0000941 'File name in directory %r and header %r differ.'
942 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000943
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944 # check for encrypted flag & handle password
945 is_encrypted = zinfo.flag_bits & 0x1
946 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000947 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948 if not pwd:
949 pwd = self.pwd
950 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000951 if not self._filePassed:
952 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +0000953 raise RuntimeError("File %s is encrypted, "
954 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955
Thomas Wouterscf297e42007-02-23 15:07:44 +0000956 zd = _ZipDecrypter(pwd)
957 # The first 12 bytes in the cypher stream is an encryption header
958 # used to strengthen the algorithm. The first 11 bytes are
959 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000960 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000961 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +0000962 header = zef_file.read(12)
963 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000964 if zinfo.flag_bits & 0x8:
965 # compare against the file type from extended local headers
966 check_byte = (zinfo._raw_time >> 8) & 0xff
967 else:
968 # compare against the CRC otherwise
969 check_byte = (zinfo.CRC >> 24) & 0xff
970 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000971 if not self._filePassed:
972 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +0000973 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000974
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000975 return ZipExtFile(zef_file, mode, zinfo, zd,
976 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000977
Christian Heimes790c8232008-01-07 21:14:23 +0000978 def extract(self, member, path=None, pwd=None):
979 """Extract a member from the archive to the current working directory,
980 using its full name. Its file information is extracted as accurately
981 as possible. `member' may be a filename or a ZipInfo object. You can
982 specify a different directory using `path'.
983 """
984 if not isinstance(member, ZipInfo):
985 member = self.getinfo(member)
986
987 if path is None:
988 path = os.getcwd()
989
990 return self._extract_member(member, path, pwd)
991
992 def extractall(self, path=None, members=None, pwd=None):
993 """Extract all members from the archive to the current working
994 directory. `path' specifies a different directory to extract to.
995 `members' is optional and must be a subset of the list returned
996 by namelist().
997 """
998 if members is None:
999 members = self.namelist()
1000
1001 for zipinfo in members:
1002 self.extract(zipinfo, path, pwd)
1003
1004 def _extract_member(self, member, targetpath, pwd):
1005 """Extract the ZipInfo object 'member' to a physical
1006 file on the path targetpath.
1007 """
1008 # build the destination pathname, replacing
1009 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001010 # Strip trailing path separator, unless it represents the root.
1011 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1012 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001013 targetpath = targetpath[:-1]
1014
1015 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001016 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001017 targetpath = os.path.join(targetpath, member.filename[1:])
1018 else:
1019 targetpath = os.path.join(targetpath, member.filename)
1020
1021 targetpath = os.path.normpath(targetpath)
1022
1023 # Create all upper directories if necessary.
1024 upperdirs = os.path.dirname(targetpath)
1025 if upperdirs and not os.path.exists(upperdirs):
1026 os.makedirs(upperdirs)
1027
Martin v. Löwis59e47792009-01-24 14:10:07 +00001028 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001029 if not os.path.isdir(targetpath):
1030 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001031 return targetpath
1032
Georg Brandlb533e262008-05-25 18:19:30 +00001033 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001034 target = open(targetpath, "wb")
1035 shutil.copyfileobj(source, target)
1036 source.close()
1037 target.close()
1038
1039 return targetpath
1040
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001041 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001042 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001043 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001044 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001045 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001046 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001047 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001049 raise RuntimeError(
1050 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001051 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001052 raise RuntimeError(
1053 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001054 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001055 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001056 if zinfo.file_size > ZIP64_LIMIT:
1057 if not self._allowZip64:
1058 raise LargeZipFile("Filesize would require ZIP64 extensions")
1059 if zinfo.header_offset > ZIP64_LIMIT:
1060 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001061 raise LargeZipFile(
1062 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001063
1064 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001065 """Put the bytes from filename into the archive under the name
1066 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001067 if not self.fp:
1068 raise RuntimeError(
1069 "Attempt to write to ZIP archive that was already closed")
1070
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001071 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001072 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001073 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001074 date_time = mtime[0:6]
1075 # Create ZipInfo instance to store file information
1076 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001077 arcname = filename
1078 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1079 while arcname[0] in (os.sep, os.altsep):
1080 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001081 if isdir:
1082 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001083 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001084 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001086 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 else:
Tim Peterse1190062001-01-15 03:34:38 +00001088 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001089
1090 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001091 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001092 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001093
1094 self._writecheck(zinfo)
1095 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001096
1097 if isdir:
1098 zinfo.file_size = 0
1099 zinfo.compress_size = 0
1100 zinfo.CRC = 0
1101 self.filelist.append(zinfo)
1102 self.NameToInfo[zinfo.filename] = zinfo
1103 self.fp.write(zinfo.FileHeader())
1104 return
1105
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001106 with open(filename, "rb") as fp:
1107 # Must overwrite CRC and sizes with correct data later
1108 zinfo.CRC = CRC = 0
1109 zinfo.compress_size = compress_size = 0
1110 zinfo.file_size = file_size = 0
1111 self.fp.write(zinfo.FileHeader())
1112 if zinfo.compress_type == ZIP_DEFLATED:
1113 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1114 zlib.DEFLATED, -15)
1115 else:
1116 cmpr = None
1117 while 1:
1118 buf = fp.read(1024 * 8)
1119 if not buf:
1120 break
1121 file_size = file_size + len(buf)
1122 CRC = crc32(buf, CRC) & 0xffffffff
1123 if cmpr:
1124 buf = cmpr.compress(buf)
1125 compress_size = compress_size + len(buf)
1126 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127 if cmpr:
1128 buf = cmpr.flush()
1129 compress_size = compress_size + len(buf)
1130 self.fp.write(buf)
1131 zinfo.compress_size = compress_size
1132 else:
1133 zinfo.compress_size = file_size
1134 zinfo.CRC = CRC
1135 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001136 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001137 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001138 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001139 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001141 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001142 self.filelist.append(zinfo)
1143 self.NameToInfo[zinfo.filename] = zinfo
1144
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001145 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001146 """Write a file into the archive. The contents is 'data', which
1147 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1148 it is encoded as UTF-8 first.
1149 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001150 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001151 if isinstance(data, str):
1152 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001153 if not isinstance(zinfo_or_arcname, ZipInfo):
1154 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001155 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001156 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001157 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001158 else:
1159 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001160
1161 if not self.fp:
1162 raise RuntimeError(
1163 "Attempt to write to ZIP archive that was already closed")
1164
Guido van Rossum85825dc2007-08-27 17:03:28 +00001165 zinfo.file_size = len(data) # Uncompressed size
1166 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001167 if compress_type is not None:
1168 zinfo.compress_type = compress_type
1169
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001170 self._writecheck(zinfo)
1171 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001172 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001173 if zinfo.compress_type == ZIP_DEFLATED:
1174 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1175 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001176 data = co.compress(data) + co.flush()
1177 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001178 else:
1179 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001180 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001182 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001183 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001184 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001185 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001186 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001187 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 self.filelist.append(zinfo)
1189 self.NameToInfo[zinfo.filename] = zinfo
1190
1191 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001192 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001193 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001194
1195 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001196 """Close the file, and for mode "w" and "a" write the ending
1197 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001198 if self.fp is None:
1199 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001200
1201 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001202 count = 0
1203 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001204 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001205 count = count + 1
1206 dt = zinfo.date_time
1207 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001208 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001209 extra = []
1210 if zinfo.file_size > ZIP64_LIMIT \
1211 or zinfo.compress_size > ZIP64_LIMIT:
1212 extra.append(zinfo.file_size)
1213 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001214 file_size = 0xffffffff
1215 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001216 else:
1217 file_size = zinfo.file_size
1218 compress_size = zinfo.compress_size
1219
1220 if zinfo.header_offset > ZIP64_LIMIT:
1221 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001222 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001223 else:
1224 header_offset = zinfo.header_offset
1225
1226 extra_data = zinfo.extra
1227 if extra:
1228 # Append a ZIP64 field to the extra's
1229 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001230 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001231 1, 8*len(extra), *extra) + extra_data
1232
1233 extract_version = max(45, zinfo.extract_version)
1234 create_version = max(45, zinfo.create_version)
1235 else:
1236 extract_version = zinfo.extract_version
1237 create_version = zinfo.create_version
1238
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001239 try:
1240 filename, flag_bits = zinfo._encodeFilenameFlags()
1241 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001242 stringCentralDir, create_version,
1243 zinfo.create_system, extract_version, zinfo.reserved,
1244 flag_bits, zinfo.compress_type, dostime, dosdate,
1245 zinfo.CRC, compress_size, file_size,
1246 len(filename), len(extra_data), len(zinfo.comment),
1247 0, zinfo.internal_attr, zinfo.external_attr,
1248 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001249 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001250 print((structCentralDir, stringCentralDir, create_version,
1251 zinfo.create_system, extract_version, zinfo.reserved,
1252 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1253 zinfo.CRC, compress_size, file_size,
1254 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1255 0, zinfo.internal_attr, zinfo.external_attr,
1256 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001257 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001259 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001260 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001262
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263 pos2 = self.fp.tell()
1264 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001265 centDirCount = count
1266 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001267 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001268 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1269 centDirOffset > ZIP64_LIMIT or
1270 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001271 # Need to write the ZIP64 end-of-archive records
1272 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001273 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001274 44, 45, 45, 0, 0, centDirCount, centDirCount,
1275 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001276 self.fp.write(zip64endrec)
1277
1278 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001279 structEndArchive64Locator,
1280 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001281 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001282 centDirCount = min(centDirCount, 0xFFFF)
1283 centDirSize = min(centDirSize, 0xFFFFFFFF)
1284 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001285
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001286 # check for valid comment length
1287 if len(self.comment) >= ZIP_MAX_COMMENT:
1288 if self.debug > 0:
1289 msg = 'Archive comment is too long; truncating to %d bytes' \
1290 % ZIP_MAX_COMMENT
1291 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001292
Georg Brandl2ee470f2008-07-16 12:55:28 +00001293 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001294 0, 0, centDirCount, centDirCount,
1295 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001296 self.fp.write(endrec)
1297 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001298 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001299
Fred Drake3d9091e2001-03-26 15:49:24 +00001300 if not self._filePassed:
1301 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001302 self.fp = None
1303
1304
1305class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001306 """Class to create ZIP archives with Python library files and packages."""
1307
Georg Brandl8334fd92010-12-04 10:26:46 +00001308 def __init__(self, file, mode="r", compression=ZIP_STORED,
1309 allowZip64=False, optimize=-1):
1310 ZipFile.__init__(self, file, mode=mode, compression=compression,
1311 allowZip64=allowZip64)
1312 self._optimize = optimize
1313
Georg Brandlfe991052009-09-16 15:54:04 +00001314 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001315 """Add all files from "pathname" to the ZIP archive.
1316
Fred Drake484d7352000-10-02 21:14:52 +00001317 If pathname is a package directory, search the directory and
1318 all package subdirectories recursively for all *.py and enter
1319 the modules into the archive. If pathname is a plain
1320 directory, listdir *.py and enter all modules. Else, pathname
1321 must be a Python *.py file and the module will be put into the
1322 archive. Added modules are always module.pyo or module.pyc.
1323 This method will compile the module.py into module.pyc if
1324 necessary.
1325 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001326 dir, name = os.path.split(pathname)
1327 if os.path.isdir(pathname):
1328 initname = os.path.join(pathname, "__init__.py")
1329 if os.path.isfile(initname):
1330 # This is a package directory, add it
1331 if basename:
1332 basename = "%s/%s" % (basename, name)
1333 else:
1334 basename = name
1335 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001336 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001337 fname, arcname = self._get_codename(initname[0:-3], basename)
1338 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001339 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001340 self.write(fname, arcname)
1341 dirlist = os.listdir(pathname)
1342 dirlist.remove("__init__.py")
1343 # Add all *.py files and package subdirectories
1344 for filename in dirlist:
1345 path = os.path.join(pathname, filename)
1346 root, ext = os.path.splitext(filename)
1347 if os.path.isdir(path):
1348 if os.path.isfile(os.path.join(path, "__init__.py")):
1349 # This is a package directory, add it
1350 self.writepy(path, basename) # Recursive call
1351 elif ext == ".py":
1352 fname, arcname = self._get_codename(path[0:-3],
1353 basename)
1354 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001355 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001356 self.write(fname, arcname)
1357 else:
1358 # This is NOT a package directory, add its files at top level
1359 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001360 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001361 for filename in os.listdir(pathname):
1362 path = os.path.join(pathname, filename)
1363 root, ext = os.path.splitext(filename)
1364 if ext == ".py":
1365 fname, arcname = self._get_codename(path[0:-3],
1366 basename)
1367 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001368 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001369 self.write(fname, arcname)
1370 else:
1371 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001372 raise RuntimeError(
1373 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001374 fname, arcname = self._get_codename(pathname[0:-3], basename)
1375 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001376 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001377 self.write(fname, arcname)
1378
1379 def _get_codename(self, pathname, basename):
1380 """Return (filename, archivename) for the path.
1381
Fred Drake484d7352000-10-02 21:14:52 +00001382 Given a module name path, return the correct file path and
1383 archive name, compiling if necessary. For example, given
1384 /python/lib/string, return (/python/lib/string.pyc, string).
1385 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001386 def _compile(file, optimize=-1):
1387 import py_compile
1388 if self.debug:
1389 print("Compiling", file)
1390 try:
1391 py_compile.compile(file, doraise=True, optimize=optimize)
1392 except py_compile.PyCompileError as error:
1393 print(err.msg)
1394 return False
1395 return True
1396
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001397 file_py = pathname + ".py"
1398 file_pyc = pathname + ".pyc"
1399 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001400 pycache_pyc = imp.cache_from_source(file_py, True)
1401 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001402 if self._optimize == -1:
1403 # legacy mode: use whatever file is present
1404 if (os.path.isfile(file_pyo) and
1405 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1406 # Use .pyo file.
1407 arcname = fname = file_pyo
1408 elif (os.path.isfile(file_pyc) and
1409 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1410 # Use .pyc file.
1411 arcname = fname = file_pyc
1412 elif (os.path.isfile(pycache_pyc) and
1413 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1414 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1415 # file name in the archive.
1416 fname = pycache_pyc
1417 arcname = file_pyc
1418 elif (os.path.isfile(pycache_pyo) and
1419 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1420 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1421 # file name in the archive.
1422 fname = pycache_pyo
1423 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001424 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001425 # Compile py into PEP 3147 pyc file.
1426 if _compile(file_py):
1427 fname = (pycache_pyc if __debug__ else pycache_pyo)
1428 arcname = (file_pyc if __debug__ else file_pyo)
1429 else:
1430 fname = arcname = file_py
1431 else:
1432 # new mode: use given optimization level
1433 if self._optimize == 0:
1434 fname = pycache_pyc
1435 arcname = file_pyc
1436 else:
1437 fname = pycache_pyo
1438 arcname = file_pyo
1439 if not (os.path.isfile(fname) and
1440 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1441 if not _compile(file_py, optimize=self._optimize):
1442 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001443 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001444 if basename:
1445 archivename = "%s/%s" % (basename, archivename)
1446 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001447
1448
1449def main(args = None):
1450 import textwrap
1451 USAGE=textwrap.dedent("""\
1452 Usage:
1453 zipfile.py -l zipfile.zip # Show listing of a zipfile
1454 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1455 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1456 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1457 """)
1458 if args is None:
1459 args = sys.argv[1:]
1460
1461 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001462 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001463 sys.exit(1)
1464
1465 if args[0] == '-l':
1466 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001467 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001468 sys.exit(1)
1469 zf = ZipFile(args[1], 'r')
1470 zf.printdir()
1471 zf.close()
1472
1473 elif args[0] == '-t':
1474 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001475 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001476 sys.exit(1)
1477 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001478 badfile = zf.testzip()
1479 if badfile:
1480 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001481 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001482
1483 elif args[0] == '-e':
1484 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001485 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001486 sys.exit(1)
1487
1488 zf = ZipFile(args[1], 'r')
1489 out = args[2]
1490 for path in zf.namelist():
1491 if path.startswith('./'):
1492 tgt = os.path.join(out, path[2:])
1493 else:
1494 tgt = os.path.join(out, path)
1495
1496 tgtdir = os.path.dirname(tgt)
1497 if not os.path.exists(tgtdir):
1498 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001499 with open(tgt, 'wb') as fp:
1500 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001501 zf.close()
1502
1503 elif args[0] == '-c':
1504 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001505 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001506 sys.exit(1)
1507
1508 def addToZip(zf, path, zippath):
1509 if os.path.isfile(path):
1510 zf.write(path, zippath, ZIP_DEFLATED)
1511 elif os.path.isdir(path):
1512 for nm in os.listdir(path):
1513 addToZip(zf,
1514 os.path.join(path, nm), os.path.join(zippath, nm))
1515 # else: ignore
1516
1517 zf = ZipFile(args[1], 'w', allowZip64=True)
1518 for src in args[2:]:
1519 addToZip(zf, src, os.path.basename(src))
1520
1521 zf.close()
1522
1523if __name__ == "__main__":
1524 main()