blob: 2da70b5e60d3e0941ea26e6764189fc4650d9ade [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
180 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000184 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000191 struct.unpack(structEndArchive64, data)
192 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000196 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 endrec[_ECD_DISK_NUMBER] = disk_num
198 endrec[_ECD_DISK_START] = disk_dir
199 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
200 endrec[_ECD_ENTRIES_TOTAL] = dircount2
201 endrec[_ECD_SIZE] = dirsize
202 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206def _EndRecData(fpin):
207 """Return data from the "End of Central Directory" record, or None.
208
209 The data is a list of the nine items in the ZIP "End of central dir"
210 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211
212 # Determine file size
213 fpin.seek(0, 2)
214 filesize = fpin.tell()
215
216 # Check to see if this is ZIP file with no archive comment (the
217 # "end of central directory" structure should be the last item in the
218 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000219 try:
220 fpin.seek(-sizeEndCentDir, 2)
221 except IOError:
222 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000224 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec=list(endrec)
228
229 # Append a blank comment and record start offset
230 endrec.append(b"")
231 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253
R David Murray4fbb9db2011-06-09 15:50:51 -0400254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
258 # Unable to find a valid end of central directory structure
259 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000260
Fred Drake484d7352000-10-02 21:14:52 +0000261
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000263 """Class with attributes describing each file in the ZIP archive."""
264
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000284 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 )
286
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000288 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000299 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000300
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800303
304 if date_time[0] < 1980:
305 raise ValueError('ZIP does not support timestamps before 1980')
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000308 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000309 self.comment = b"" # Comment for each file
310 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000311 if sys.platform == 'win32':
312 self.create_system = 0 # System which created ZIP archive
313 else:
314 # Assume everything else is unix-y
315 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000316 self.create_version = 20 # Version which created ZIP archive
317 self.extract_version = 20 # Version needed to extract archive
318 self.reserved = 0 # Must be zero
319 self.flag_bits = 0 # ZIP flag bits
320 self.volume = 0 # Volume number of file header
321 self.internal_attr = 0 # Internal attributes
322 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000324 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000325 # CRC CRC-32 of the uncompressed file
326 # compress_size Size of the compressed file
327 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328
329 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000330 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 dt = self.date_time
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # Set these to zero because we write them after the file data
336 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 else:
Tim Peterse1190062001-01-15 03:34:38 +0000338 CRC = self.CRC
339 compress_size = self.compress_size
340 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
342 extra = self.extra
343
344 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
345 # File is larger than what fits into a 4 byte integer,
346 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000347 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000348 extra = extra + struct.pack(fmt,
349 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000350 file_size = 0xffffffff
351 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 self.extract_version = max(45, self.extract_version)
353 self.create_version = max(45, self.extract_version)
354
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000355 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000356 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000357 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358 self.compress_type, dostime, dosdate, CRC,
359 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000360 len(filename), len(extra))
361 return header + filename + extra
362
363 def _encodeFilenameFlags(self):
364 try:
365 return self.filename.encode('ascii'), self.flag_bits
366 except UnicodeEncodeError:
367 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368
369 def _decodeExtra(self):
370 # Try to decode the extra field.
371 extra = self.extra
372 unpack = struct.unpack
373 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000374 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000375 if tp == 1:
376 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000379 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000380 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000381 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 elif ln == 0:
383 counts = ()
384 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000385 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386
387 idx = 0
388
389 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000390 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391 self.file_size = counts[idx]
392 idx += 1
393
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000394 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395 self.compress_size = counts[idx]
396 idx += 1
397
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000398 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000399 old = self.header_offset
400 self.header_offset = counts[idx]
401 idx+=1
402
403 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
405
Thomas Wouterscf297e42007-02-23 15:07:44 +0000406class _ZipDecrypter:
407 """Class to handle decryption of files stored within a ZIP archive.
408
409 ZIP supports a password-based form of encryption. Even though known
410 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000411 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000412
413 Usage:
414 zd = _ZipDecrypter(mypwd)
415 plain_char = zd(cypher_char)
416 plain_text = map(zd, cypher_text)
417 """
418
419 def _GenerateCRCTable():
420 """Generate a CRC-32 table.
421
422 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
423 internal keys. We noticed that a direct implementation is faster than
424 relying on binascii.crc32().
425 """
426 poly = 0xedb88320
427 table = [0] * 256
428 for i in range(256):
429 crc = i
430 for j in range(8):
431 if crc & 1:
432 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
433 else:
434 crc = ((crc >> 1) & 0x7FFFFFFF)
435 table[i] = crc
436 return table
437 crctable = _GenerateCRCTable()
438
439 def _crc32(self, ch, crc):
440 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000441 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000442
443 def __init__(self, pwd):
444 self.key0 = 305419896
445 self.key1 = 591751049
446 self.key2 = 878082192
447 for p in pwd:
448 self._UpdateKeys(p)
449
450 def _UpdateKeys(self, c):
451 self.key0 = self._crc32(c, self.key0)
452 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
453 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000454 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000455
456 def __call__(self, c):
457 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000458 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000459 k = self.key2 | 2
460 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000461 self._UpdateKeys(c)
462 return c
463
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000464class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000465 """File-like object for reading an archive member.
466 Is returned by ZipFile.open().
467 """
468
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000469 # Max size supported by decompressor.
470 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000471
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000472 # Read from compressed files in 4k blocks.
473 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000474
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000475 # Search for universal newlines or line chunks.
476 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
477
Łukasz Langae94980a2010-11-22 23:31:26 +0000478 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
479 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000480 self._fileobj = fileobj
481 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000482 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000483
Ezio Melotti92b47432010-01-28 01:44:41 +0000484 self._compress_type = zipinfo.compress_type
485 self._compress_size = zipinfo.compress_size
486 self._compress_left = zipinfo.compress_size
487
488 if self._compress_type == ZIP_DEFLATED:
489 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000490 self._unconsumed = b''
491
492 self._readbuffer = b''
493 self._offset = 0
494
495 self._universal = 'U' in mode
496 self.newlines = None
497
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000498 # Adjust read size for encrypted files since the first 12 bytes
499 # are for the encryption/password information.
500 if self._decrypter is not None:
501 self._compress_left -= 12
502
503 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000504 self.name = zipinfo.filename
505
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000506 if hasattr(zipinfo, 'CRC'):
507 self._expected_crc = zipinfo.CRC
508 self._running_crc = crc32(b'') & 0xffffffff
509 else:
510 self._expected_crc = None
511
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000512 def readline(self, limit=-1):
513 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000514
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000515 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000516 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000517
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000518 if not self._universal and limit < 0:
519 # Shortcut common case - newline found in buffer.
520 i = self._readbuffer.find(b'\n', self._offset) + 1
521 if i > 0:
522 line = self._readbuffer[self._offset: i]
523 self._offset = i
524 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000525
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000526 if not self._universal:
527 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000528
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000529 line = b''
530 while limit < 0 or len(line) < limit:
531 readahead = self.peek(2)
532 if readahead == b'':
533 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000534
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000535 #
536 # Search for universal newlines or line chunks.
537 #
538 # The pattern returns either a line chunk or a newline, but not
539 # both. Combined with peek(2), we are assured that the sequence
540 # '\r\n' is always retrieved completely and never split into
541 # separate newlines - '\r', '\n' due to coincidental readaheads.
542 #
543 match = self.PATTERN.search(readahead)
544 newline = match.group('newline')
545 if newline is not None:
546 if self.newlines is None:
547 self.newlines = []
548 if newline not in self.newlines:
549 self.newlines.append(newline)
550 self._offset += len(newline)
551 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000553 chunk = match.group('chunk')
554 if limit >= 0:
555 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000557 self._offset += len(chunk)
558 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000560 return line
561
562 def peek(self, n=1):
563 """Returns buffered bytes without advancing the position."""
564 if n > len(self._readbuffer) - self._offset:
565 chunk = self.read(n)
566 self._offset -= len(chunk)
567
568 # Return up to 512 bytes to reduce allocation overhead for tight loops.
569 return self._readbuffer[self._offset: self._offset + 512]
570
571 def readable(self):
572 return True
573
574 def read(self, n=-1):
575 """Read and return up to n bytes.
576 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000577 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000578 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000579 if n is None:
580 n = -1
581 while True:
582 if n < 0:
583 data = self.read1(n)
584 elif n > len(buf):
585 data = self.read1(n - len(buf))
586 else:
587 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000588 if len(data) == 0:
589 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000590 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000591
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000592 def _update_crc(self, newdata, eof):
593 # Update the CRC using the given data.
594 if self._expected_crc is None:
595 # No need to compute the CRC if we don't have a reference value
596 return
597 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
598 # Check the CRC if we're at the end of the file
599 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000600 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000601
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000602 def read1(self, n):
603 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000605 # Simplify algorithm (branching) by transforming negative n to large n.
606 if n < 0 or n is None:
607 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000608
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000609 # Bytes available in read buffer.
610 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000611
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000612 # Read from file.
613 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
614 nbytes = n - len_readbuffer - len(self._unconsumed)
615 nbytes = max(nbytes, self.MIN_READ_SIZE)
616 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000617
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000618 data = self._fileobj.read(nbytes)
619 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000620
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621 if data and self._decrypter is not None:
622 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000623
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000624 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000625 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000626 self._readbuffer = self._readbuffer[self._offset:] + data
627 self._offset = 0
628 else:
629 # Prepare deflated bytes for decompression.
630 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000631
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000632 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000633 if (len(self._unconsumed) > 0 and n > len_readbuffer and
634 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000635 data = self._decompressor.decompress(
636 self._unconsumed,
637 max(n - len_readbuffer, self.MIN_READ_SIZE)
638 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000639
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000640 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000641 eof = len(self._unconsumed) == 0 and self._compress_left == 0
642 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000643 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000644
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000645 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000646 self._readbuffer = self._readbuffer[self._offset:] + data
647 self._offset = 0
648
649 # Read from buffer.
650 data = self._readbuffer[self._offset: self._offset + n]
651 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000652 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000653
Łukasz Langae94980a2010-11-22 23:31:26 +0000654 def close(self):
655 try:
656 if self._close_fileobj:
657 self._fileobj.close()
658 finally:
659 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000660
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000661
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000662class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000663 """ Class with methods to open, read, write, close, list zip files.
664
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000665 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000666
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 file: Either the path to the file, or a file-like object.
668 If it is a path, the file will be opened and closed by ZipFile.
669 mode: The mode can be either read "r", write "w" or append "a".
670 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000671 allowZip64: if True ZipFile will create files with ZIP64 extensions when
672 needed, otherwise it will raise an exception when this would
673 be necessary.
674
Fred Drake3d9091e2001-03-26 15:49:24 +0000675 """
Fred Drake484d7352000-10-02 21:14:52 +0000676
Fred Drake90eac282001-02-28 05:29:34 +0000677 fp = None # Set here since __del__ checks it
678
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000679 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000680 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000681 if mode not in ("r", "w", "a"):
682 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
683
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 if compression == ZIP_STORED:
685 pass
686 elif compression == ZIP_DEFLATED:
687 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000688 raise RuntimeError(
689 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000691 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000692
693 self._allowZip64 = allowZip64
694 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000695 self.debug = 0 # Level of printing: 0 through 3
696 self.NameToInfo = {} # Find file info given name
697 self.filelist = [] # List of ZipInfo instances for archive
698 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000699 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000700 self.pwd = None
R David Murray51804e92012-04-12 18:44:42 -0400701 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000702
Fred Drake3d9091e2001-03-26 15:49:24 +0000703 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000704 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000705 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000706 self._filePassed = 0
707 self.filename = file
708 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000709 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000710 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000711 except IOError:
712 if mode == 'a':
713 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000714 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000715 else:
716 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000717 else:
718 self._filePassed = 1
719 self.fp = file
720 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000721
Antoine Pitrou17babc52012-11-17 23:50:08 +0100722 try:
723 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000724 self._RealGetContents()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100725 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000726 # set the modified flag so central directory gets written
727 # even if no files are added to the archive
728 self._didModify = True
Antoine Pitrou17babc52012-11-17 23:50:08 +0100729 elif key == 'a':
730 try:
731 # See if file is a zip file
732 self._RealGetContents()
733 # seek to start of directory and overwrite
734 self.fp.seek(self.start_dir, 0)
735 except BadZipFile:
736 # file is not a zip file, just append
737 self.fp.seek(0, 2)
738
739 # set the modified flag so central directory gets written
740 # even if no files are added to the archive
741 self._didModify = True
742 else:
743 raise RuntimeError('Mode must be "r", "w" or "a"')
744 except:
745 fp = self.fp
746 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000747 if not self._filePassed:
Antoine Pitrou17babc52012-11-17 23:50:08 +0100748 fp.close()
749 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000751 def __enter__(self):
752 return self
753
754 def __exit__(self, type, value, traceback):
755 self.close()
756
Tim Peters7d3bad62001-04-04 18:56:49 +0000757 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000758 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000759 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000760 try:
761 endrec = _EndRecData(fp)
762 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000763 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000764 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000765 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000767 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000768 size_cd = endrec[_ECD_SIZE] # bytes in central directory
769 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray51804e92012-04-12 18:44:42 -0400770 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000771
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000772 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000773 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000774 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
775 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000776 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
777
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000779 inferred = concat + offset_cd
780 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 # self.start_dir: Position of start of central directory
782 self.start_dir = offset_cd + concat
783 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000784 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000785 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 total = 0
787 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000788 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000789 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000790 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 centdir = struct.unpack(structCentralDir, centdir)
792 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000793 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000794 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000795 flags = centdir[5]
796 if flags & 0x800:
797 # UTF-8 file names extension
798 filename = filename.decode('utf-8')
799 else:
800 # Historical ZIP filename encoding
801 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000803 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000804 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
805 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000806 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807 (x.create_version, x.create_system, x.extract_version, x.reserved,
808 x.flag_bits, x.compress_type, t, d,
809 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
810 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
811 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000812 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000814 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000815
816 x._decodeExtra()
817 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 self.filelist.append(x)
819 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000820
821 # update total bytes read from central directory
822 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
823 + centdir[_CD_EXTRA_FIELD_LENGTH]
824 + centdir[_CD_COMMENT_LENGTH])
825
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000827 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000828
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829
830 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000831 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 l = []
833 for data in self.filelist:
834 l.append(data.filename)
835 return l
836
837 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000838 """Return a list of class ZipInfo instances for files in the
839 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 return self.filelist
841
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000842 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000843 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000844 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
845 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000846 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000847 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000848 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
849 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850
851 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000852 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000853 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854 for zinfo in self.filelist:
855 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000856 # Read by chunks, to avoid an OverflowError or a
857 # MemoryError with very large embedded files.
Antoine Pitrou17babc52012-11-17 23:50:08 +0100858 with self.open(zinfo.filename, "r") as f:
859 while f.read(chunk_size): # Check CRC-32
860 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000861 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862 return zinfo.filename
863
864 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000865 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000866 info = self.NameToInfo.get(name)
867 if info is None:
868 raise KeyError(
869 'There is no item named %r in the archive' % name)
870
871 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000872
Thomas Wouterscf297e42007-02-23 15:07:44 +0000873 def setpassword(self, pwd):
874 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000875 if pwd and not isinstance(pwd, bytes):
876 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
877 if pwd:
878 self.pwd = pwd
879 else:
880 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000881
R David Murray51804e92012-04-12 18:44:42 -0400882 @property
883 def comment(self):
884 """The comment text associated with the ZIP file."""
885 return self._comment
886
887 @comment.setter
888 def comment(self, comment):
889 if not isinstance(comment, bytes):
890 raise TypeError("comment: expected bytes, got %s" % type(comment))
891 # check for valid comment length
892 if len(comment) >= ZIP_MAX_COMMENT:
893 if self.debug:
894 print('Archive comment is too long; truncating to %d bytes'
895 % ZIP_MAX_COMMENT)
896 comment = comment[:ZIP_MAX_COMMENT]
897 self._comment = comment
898 self._didModify = True
899
Thomas Wouterscf297e42007-02-23 15:07:44 +0000900 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000901 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000902 with self.open(name, "r", pwd) as fp:
903 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904
905 def open(self, name, mode="r", pwd=None):
906 """Return file-like object for 'name'."""
907 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000908 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +0000909 if pwd and not isinstance(pwd, bytes):
910 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000911 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000912 raise RuntimeError(
913 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000914
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 # Only open a new file for instances where we were not
916 # given a file object in the constructor
917 if self._filePassed:
918 zef_file = self.fp
919 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000920 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921
Antoine Pitrou17babc52012-11-17 23:50:08 +0100922 try:
923 # Make sure we have an info object
924 if isinstance(name, ZipInfo):
925 # 'name' is already an info object
926 zinfo = name
927 else:
928 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000929 zinfo = self.getinfo(name)
Antoine Pitrou17babc52012-11-17 23:50:08 +0100930 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000931
Antoine Pitrou17babc52012-11-17 23:50:08 +0100932 # Skip the file header:
933 fheader = zef_file.read(sizeFileHeader)
934 if fheader[0:4] != stringFileHeader:
935 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000936
Antoine Pitrou17babc52012-11-17 23:50:08 +0100937 fheader = struct.unpack(structFileHeader, fheader)
938 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
939 if fheader[_FH_EXTRA_FIELD_LENGTH]:
940 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000941
Antoine Pitrou17babc52012-11-17 23:50:08 +0100942 if zinfo.flag_bits & 0x800:
943 # UTF-8 filename
944 fname_str = fname.decode("utf-8")
945 else:
946 fname_str = fname.decode("cp437")
Georg Brandl5ba11de2011-01-01 10:09:32 +0000947
Antoine Pitrou17babc52012-11-17 23:50:08 +0100948 if fname_str != zinfo.orig_filename:
949 raise BadZipFile(
950 'File name in directory %r and header %r differ.'
951 % (zinfo.orig_filename, fname))
952
953 # check for encrypted flag & handle password
954 is_encrypted = zinfo.flag_bits & 0x1
955 zd = None
956 if is_encrypted:
957 if not pwd:
958 pwd = self.pwd
959 if not pwd:
960 raise RuntimeError("File %s is encrypted, password "
961 "required for extraction" % name)
962
963 zd = _ZipDecrypter(pwd)
964 # The first 12 bytes in the cypher stream is an encryption header
965 # used to strengthen the algorithm. The first 11 bytes are
966 # completely random, while the 12th contains the MSB of the CRC,
967 # or the MSB of the file time depending on the header type
968 # and is used to check the correctness of the password.
969 header = zef_file.read(12)
970 h = list(map(zd, header[0:12]))
971 if zinfo.flag_bits & 0x8:
972 # compare against the file type from extended local headers
973 check_byte = (zinfo._raw_time >> 8) & 0xff
974 else:
975 # compare against the CRC otherwise
976 check_byte = (zinfo.CRC >> 24) & 0xff
977 if h[11] != check_byte:
978 raise RuntimeError("Bad password for file", name)
979
980 return ZipExtFile(zef_file, mode, zinfo, zd,
981 close_fileobj=not self._filePassed)
982 except:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000983 if not self._filePassed:
984 zef_file.close()
Antoine Pitrou17babc52012-11-17 23:50:08 +0100985 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000986
Christian Heimes790c8232008-01-07 21:14:23 +0000987 def extract(self, member, path=None, pwd=None):
988 """Extract a member from the archive to the current working directory,
989 using its full name. Its file information is extracted as accurately
990 as possible. `member' may be a filename or a ZipInfo object. You can
991 specify a different directory using `path'.
992 """
993 if not isinstance(member, ZipInfo):
994 member = self.getinfo(member)
995
996 if path is None:
997 path = os.getcwd()
998
999 return self._extract_member(member, path, pwd)
1000
1001 def extractall(self, path=None, members=None, pwd=None):
1002 """Extract all members from the archive to the current working
1003 directory. `path' specifies a different directory to extract to.
1004 `members' is optional and must be a subset of the list returned
1005 by namelist().
1006 """
1007 if members is None:
1008 members = self.namelist()
1009
1010 for zipinfo in members:
1011 self.extract(zipinfo, path, pwd)
1012
1013 def _extract_member(self, member, targetpath, pwd):
1014 """Extract the ZipInfo object 'member' to a physical
1015 file on the path targetpath.
1016 """
1017 # build the destination pathname, replacing
1018 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001019 # Strip trailing path separator, unless it represents the root.
1020 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1021 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001022 targetpath = targetpath[:-1]
1023
1024 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001025 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001026 targetpath = os.path.join(targetpath, member.filename[1:])
1027 else:
1028 targetpath = os.path.join(targetpath, member.filename)
1029
1030 targetpath = os.path.normpath(targetpath)
1031
1032 # Create all upper directories if necessary.
1033 upperdirs = os.path.dirname(targetpath)
1034 if upperdirs and not os.path.exists(upperdirs):
1035 os.makedirs(upperdirs)
1036
Martin v. Löwis59e47792009-01-24 14:10:07 +00001037 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001038 if not os.path.isdir(targetpath):
1039 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001040 return targetpath
1041
Antoine Pitrou17babc52012-11-17 23:50:08 +01001042 with self.open(member, pwd=pwd) as source, \
1043 open(targetpath, "wb") as target:
1044 shutil.copyfileobj(source, target)
Christian Heimes790c8232008-01-07 21:14:23 +00001045
1046 return targetpath
1047
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001049 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001050 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001051 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001052 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001054 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001055 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001056 raise RuntimeError(
1057 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001058 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001059 raise RuntimeError(
1060 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001061 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001062 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001063 if zinfo.file_size > ZIP64_LIMIT:
1064 if not self._allowZip64:
1065 raise LargeZipFile("Filesize would require ZIP64 extensions")
1066 if zinfo.header_offset > ZIP64_LIMIT:
1067 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001068 raise LargeZipFile(
1069 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001070
1071 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001072 """Put the bytes from filename into the archive under the name
1073 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001074 if not self.fp:
1075 raise RuntimeError(
1076 "Attempt to write to ZIP archive that was already closed")
1077
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001078 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001079 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001080 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 date_time = mtime[0:6]
1082 # Create ZipInfo instance to store file information
1083 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001084 arcname = filename
1085 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1086 while arcname[0] in (os.sep, os.altsep):
1087 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001088 if isdir:
1089 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001090 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001091 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001092 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001093 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 else:
Tim Peterse1190062001-01-15 03:34:38 +00001095 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001096
1097 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001098 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001099 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001100
1101 self._writecheck(zinfo)
1102 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001103
1104 if isdir:
1105 zinfo.file_size = 0
1106 zinfo.compress_size = 0
1107 zinfo.CRC = 0
1108 self.filelist.append(zinfo)
1109 self.NameToInfo[zinfo.filename] = zinfo
1110 self.fp.write(zinfo.FileHeader())
1111 return
1112
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001113 with open(filename, "rb") as fp:
1114 # Must overwrite CRC and sizes with correct data later
1115 zinfo.CRC = CRC = 0
1116 zinfo.compress_size = compress_size = 0
1117 zinfo.file_size = file_size = 0
1118 self.fp.write(zinfo.FileHeader())
1119 if zinfo.compress_type == ZIP_DEFLATED:
1120 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1121 zlib.DEFLATED, -15)
1122 else:
1123 cmpr = None
1124 while 1:
1125 buf = fp.read(1024 * 8)
1126 if not buf:
1127 break
1128 file_size = file_size + len(buf)
1129 CRC = crc32(buf, CRC) & 0xffffffff
1130 if cmpr:
1131 buf = cmpr.compress(buf)
1132 compress_size = compress_size + len(buf)
1133 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001134 if cmpr:
1135 buf = cmpr.flush()
1136 compress_size = compress_size + len(buf)
1137 self.fp.write(buf)
1138 zinfo.compress_size = compress_size
1139 else:
1140 zinfo.compress_size = file_size
1141 zinfo.CRC = CRC
1142 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001143 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001144 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001145 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001146 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001147 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001148 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001149 self.filelist.append(zinfo)
1150 self.NameToInfo[zinfo.filename] = zinfo
1151
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001152 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001153 """Write a file into the archive. The contents is 'data', which
1154 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1155 it is encoded as UTF-8 first.
1156 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001157 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001158 if isinstance(data, str):
1159 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001160 if not isinstance(zinfo_or_arcname, ZipInfo):
1161 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001162 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001163 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001164 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001165 else:
1166 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001167
1168 if not self.fp:
1169 raise RuntimeError(
1170 "Attempt to write to ZIP archive that was already closed")
1171
Guido van Rossum85825dc2007-08-27 17:03:28 +00001172 zinfo.file_size = len(data) # Uncompressed size
1173 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001174 if compress_type is not None:
1175 zinfo.compress_type = compress_type
1176
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001177 self._writecheck(zinfo)
1178 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001179 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180 if zinfo.compress_type == ZIP_DEFLATED:
1181 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1182 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001183 data = co.compress(data) + co.flush()
1184 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001185 else:
1186 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001187 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001189 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001190 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001192 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001193 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001194 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001195 self.filelist.append(zinfo)
1196 self.NameToInfo[zinfo.filename] = zinfo
1197
1198 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001199 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001200 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001201
1202 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001203 """Close the file, and for mode "w" and "a" write the ending
1204 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001205 if self.fp is None:
1206 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001207
Antoine Pitrou17babc52012-11-17 23:50:08 +01001208 try:
1209 if self.mode in ("w", "a") and self._didModify: # write ending records
1210 count = 0
1211 pos1 = self.fp.tell()
1212 for zinfo in self.filelist: # write central directory
1213 count = count + 1
1214 dt = zinfo.date_time
1215 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1216 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1217 extra = []
1218 if zinfo.file_size > ZIP64_LIMIT \
1219 or zinfo.compress_size > ZIP64_LIMIT:
1220 extra.append(zinfo.file_size)
1221 extra.append(zinfo.compress_size)
1222 file_size = 0xffffffff
1223 compress_size = 0xffffffff
1224 else:
1225 file_size = zinfo.file_size
1226 compress_size = zinfo.compress_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001227
Antoine Pitrou17babc52012-11-17 23:50:08 +01001228 if zinfo.header_offset > ZIP64_LIMIT:
1229 extra.append(zinfo.header_offset)
1230 header_offset = 0xffffffff
1231 else:
1232 header_offset = zinfo.header_offset
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001233
Antoine Pitrou17babc52012-11-17 23:50:08 +01001234 extra_data = zinfo.extra
1235 if extra:
1236 # Append a ZIP64 field to the extra's
1237 extra_data = struct.pack(
1238 '<HH' + 'Q'*len(extra),
1239 1, 8*len(extra), *extra) + extra_data
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001240
Antoine Pitrou17babc52012-11-17 23:50:08 +01001241 extract_version = max(45, zinfo.extract_version)
1242 create_version = max(45, zinfo.create_version)
1243 else:
1244 extract_version = zinfo.extract_version
1245 create_version = zinfo.create_version
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001246
Antoine Pitrou17babc52012-11-17 23:50:08 +01001247 try:
1248 filename, flag_bits = zinfo._encodeFilenameFlags()
1249 centdir = struct.pack(structCentralDir,
1250 stringCentralDir, create_version,
1251 zinfo.create_system, extract_version, zinfo.reserved,
1252 flag_bits, zinfo.compress_type, dostime, dosdate,
1253 zinfo.CRC, compress_size, file_size,
1254 len(filename), len(extra_data), len(zinfo.comment),
1255 0, zinfo.internal_attr, zinfo.external_attr,
1256 header_offset)
1257 except DeprecationWarning:
1258 print((structCentralDir, stringCentralDir, create_version,
1259 zinfo.create_system, extract_version, zinfo.reserved,
1260 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1261 zinfo.CRC, compress_size, file_size,
1262 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1263 0, zinfo.internal_attr, zinfo.external_attr,
1264 header_offset), file=sys.stderr)
1265 raise
1266 self.fp.write(centdir)
1267 self.fp.write(filename)
1268 self.fp.write(extra_data)
1269 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001270
Antoine Pitrou17babc52012-11-17 23:50:08 +01001271 pos2 = self.fp.tell()
1272 # Write end-of-zip-archive record
1273 centDirCount = count
1274 centDirSize = pos2 - pos1
1275 centDirOffset = pos1
1276 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1277 centDirOffset > ZIP64_LIMIT or
1278 centDirSize > ZIP64_LIMIT):
1279 # Need to write the ZIP64 end-of-archive records
1280 zip64endrec = struct.pack(
1281 structEndArchive64, stringEndArchive64,
1282 44, 45, 45, 0, 0, centDirCount, centDirCount,
1283 centDirSize, centDirOffset)
1284 self.fp.write(zip64endrec)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001285
Antoine Pitrou17babc52012-11-17 23:50:08 +01001286 zip64locrec = struct.pack(
1287 structEndArchive64Locator,
1288 stringEndArchive64Locator, 0, pos2, 1)
1289 self.fp.write(zip64locrec)
1290 centDirCount = min(centDirCount, 0xFFFF)
1291 centDirSize = min(centDirSize, 0xFFFFFFFF)
1292 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001293
Antoine Pitrou17babc52012-11-17 23:50:08 +01001294 endrec = struct.pack(structEndArchive, stringEndArchive,
1295 0, 0, centDirCount, centDirCount,
1296 centDirSize, centDirOffset, len(self._comment))
1297 self.fp.write(endrec)
1298 self.fp.write(self._comment)
1299 self.fp.flush()
1300 finally:
1301 fp = self.fp
1302 self.fp = None
1303 if not self._filePassed:
1304 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001305
1306
1307class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001308 """Class to create ZIP archives with Python library files and packages."""
1309
Georg Brandl8334fd92010-12-04 10:26:46 +00001310 def __init__(self, file, mode="r", compression=ZIP_STORED,
1311 allowZip64=False, optimize=-1):
1312 ZipFile.__init__(self, file, mode=mode, compression=compression,
1313 allowZip64=allowZip64)
1314 self._optimize = optimize
1315
Georg Brandlfe991052009-09-16 15:54:04 +00001316 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 """Add all files from "pathname" to the ZIP archive.
1318
Fred Drake484d7352000-10-02 21:14:52 +00001319 If pathname is a package directory, search the directory and
1320 all package subdirectories recursively for all *.py and enter
1321 the modules into the archive. If pathname is a plain
1322 directory, listdir *.py and enter all modules. Else, pathname
1323 must be a Python *.py file and the module will be put into the
1324 archive. Added modules are always module.pyo or module.pyc.
1325 This method will compile the module.py into module.pyc if
1326 necessary.
1327 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 dir, name = os.path.split(pathname)
1329 if os.path.isdir(pathname):
1330 initname = os.path.join(pathname, "__init__.py")
1331 if os.path.isfile(initname):
1332 # This is a package directory, add it
1333 if basename:
1334 basename = "%s/%s" % (basename, name)
1335 else:
1336 basename = name
1337 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001338 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 fname, arcname = self._get_codename(initname[0:-3], basename)
1340 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001341 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 self.write(fname, arcname)
1343 dirlist = os.listdir(pathname)
1344 dirlist.remove("__init__.py")
1345 # Add all *.py files and package subdirectories
1346 for filename in dirlist:
1347 path = os.path.join(pathname, filename)
1348 root, ext = os.path.splitext(filename)
1349 if os.path.isdir(path):
1350 if os.path.isfile(os.path.join(path, "__init__.py")):
1351 # This is a package directory, add it
1352 self.writepy(path, basename) # Recursive call
1353 elif ext == ".py":
1354 fname, arcname = self._get_codename(path[0:-3],
1355 basename)
1356 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001357 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 self.write(fname, arcname)
1359 else:
1360 # This is NOT a package directory, add its files at top level
1361 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001362 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001363 for filename in os.listdir(pathname):
1364 path = os.path.join(pathname, filename)
1365 root, ext = os.path.splitext(filename)
1366 if ext == ".py":
1367 fname, arcname = self._get_codename(path[0:-3],
1368 basename)
1369 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001370 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371 self.write(fname, arcname)
1372 else:
1373 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001374 raise RuntimeError(
1375 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376 fname, arcname = self._get_codename(pathname[0:-3], basename)
1377 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001378 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379 self.write(fname, arcname)
1380
1381 def _get_codename(self, pathname, basename):
1382 """Return (filename, archivename) for the path.
1383
Fred Drake484d7352000-10-02 21:14:52 +00001384 Given a module name path, return the correct file path and
1385 archive name, compiling if necessary. For example, given
1386 /python/lib/string, return (/python/lib/string.pyc, string).
1387 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001388 def _compile(file, optimize=-1):
1389 import py_compile
1390 if self.debug:
1391 print("Compiling", file)
1392 try:
1393 py_compile.compile(file, doraise=True, optimize=optimize)
1394 except py_compile.PyCompileError as error:
1395 print(err.msg)
1396 return False
1397 return True
1398
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001399 file_py = pathname + ".py"
1400 file_pyc = pathname + ".pyc"
1401 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001402 pycache_pyc = imp.cache_from_source(file_py, True)
1403 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001404 if self._optimize == -1:
1405 # legacy mode: use whatever file is present
1406 if (os.path.isfile(file_pyo) and
1407 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1408 # Use .pyo file.
1409 arcname = fname = file_pyo
1410 elif (os.path.isfile(file_pyc) and
1411 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1412 # Use .pyc file.
1413 arcname = fname = file_pyc
1414 elif (os.path.isfile(pycache_pyc) and
1415 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1416 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1417 # file name in the archive.
1418 fname = pycache_pyc
1419 arcname = file_pyc
1420 elif (os.path.isfile(pycache_pyo) and
1421 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1422 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1423 # file name in the archive.
1424 fname = pycache_pyo
1425 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001426 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001427 # Compile py into PEP 3147 pyc file.
1428 if _compile(file_py):
1429 fname = (pycache_pyc if __debug__ else pycache_pyo)
1430 arcname = (file_pyc if __debug__ else file_pyo)
1431 else:
1432 fname = arcname = file_py
1433 else:
1434 # new mode: use given optimization level
1435 if self._optimize == 0:
1436 fname = pycache_pyc
1437 arcname = file_pyc
1438 else:
1439 fname = pycache_pyo
1440 arcname = file_pyo
1441 if not (os.path.isfile(fname) and
1442 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1443 if not _compile(file_py, optimize=self._optimize):
1444 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001445 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001446 if basename:
1447 archivename = "%s/%s" % (basename, archivename)
1448 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001449
1450
1451def main(args = None):
1452 import textwrap
1453 USAGE=textwrap.dedent("""\
1454 Usage:
1455 zipfile.py -l zipfile.zip # Show listing of a zipfile
1456 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1457 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1458 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1459 """)
1460 if args is None:
1461 args = sys.argv[1:]
1462
1463 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001464 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001465 sys.exit(1)
1466
1467 if args[0] == '-l':
1468 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001469 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001470 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001471 with ZipFile(args[1], 'r') as zf:
1472 zf.printdir()
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001473
1474 elif args[0] == '-t':
1475 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001476 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001477 sys.exit(1)
Antoine Pitrou17babc52012-11-17 23:50:08 +01001478 with ZipFile(args[1], 'r') as zf:
1479 badfile = zf.testzip()
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001480 if badfile:
1481 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001482 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001483
1484 elif args[0] == '-e':
1485 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001486 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001487 sys.exit(1)
1488
Antoine Pitrou17babc52012-11-17 23:50:08 +01001489 with ZipFile(args[1], 'r') as zf:
1490 out = args[2]
1491 for path in zf.namelist():
1492 if path.startswith('./'):
1493 tgt = os.path.join(out, path[2:])
1494 else:
1495 tgt = os.path.join(out, path)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001496
Antoine Pitrou17babc52012-11-17 23:50:08 +01001497 tgtdir = os.path.dirname(tgt)
1498 if not os.path.exists(tgtdir):
1499 os.makedirs(tgtdir)
1500 with open(tgt, 'wb') as fp:
1501 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001502
1503 elif args[0] == '-c':
1504 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001505 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001506 sys.exit(1)
1507
1508 def addToZip(zf, path, zippath):
1509 if os.path.isfile(path):
1510 zf.write(path, zippath, ZIP_DEFLATED)
1511 elif os.path.isdir(path):
1512 for nm in os.listdir(path):
1513 addToZip(zf,
1514 os.path.join(path, nm), os.path.join(zippath, nm))
1515 # else: ignore
1516
Antoine Pitrou17babc52012-11-17 23:50:08 +01001517 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1518 for src in args[2:]:
1519 addToZip(zf, src, os.path.basename(src))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001520
1521if __name__ == "__main__":
1522 main()