blob: 406996d9620e2d34d1b37a8664d5e1c942f530ef [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
180 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000184 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000191 struct.unpack(structEndArchive64, data)
192 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000196 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 endrec[_ECD_DISK_NUMBER] = disk_num
198 endrec[_ECD_DISK_START] = disk_dir
199 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
200 endrec[_ECD_ENTRIES_TOTAL] = dircount2
201 endrec[_ECD_SIZE] = dirsize
202 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206def _EndRecData(fpin):
207 """Return data from the "End of Central Directory" record, or None.
208
209 The data is a list of the nine items in the ZIP "End of central dir"
210 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211
212 # Determine file size
213 fpin.seek(0, 2)
214 filesize = fpin.tell()
215
216 # Check to see if this is ZIP file with no archive comment (the
217 # "end of central directory" structure should be the last item in the
218 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000219 try:
220 fpin.seek(-sizeEndCentDir, 2)
221 except IOError:
222 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000224 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec=list(endrec)
228
229 # Append a blank comment and record start offset
230 endrec.append(b"")
231 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000253
R David Murray4fbb9db2011-06-09 15:50:51 -0400254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000257
258 # Unable to find a valid end of central directory structure
259 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000260
Fred Drake484d7352000-10-02 21:14:52 +0000261
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000262class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000263 """Class with attributes describing each file in the ZIP archive."""
264
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000284 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000285 )
286
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000288 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000299 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000300
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800303
304 if date_time[0] < 1980:
305 raise ValueError('ZIP does not support timestamps before 1980')
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000308 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000309 self.comment = b"" # Comment for each file
310 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000311 if sys.platform == 'win32':
312 self.create_system = 0 # System which created ZIP archive
313 else:
314 # Assume everything else is unix-y
315 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000316 self.create_version = 20 # Version which created ZIP archive
317 self.extract_version = 20 # Version needed to extract archive
318 self.reserved = 0 # Must be zero
319 self.flag_bits = 0 # ZIP flag bits
320 self.volume = 0 # Volume number of file header
321 self.internal_attr = 0 # Internal attributes
322 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000324 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000325 # CRC CRC-32 of the uncompressed file
326 # compress_size Size of the compressed file
327 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328
329 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000330 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 dt = self.date_time
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # Set these to zero because we write them after the file data
336 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 else:
Tim Peterse1190062001-01-15 03:34:38 +0000338 CRC = self.CRC
339 compress_size = self.compress_size
340 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000341
342 extra = self.extra
343
344 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
345 # File is larger than what fits into a 4 byte integer,
346 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000347 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000348 extra = extra + struct.pack(fmt,
349 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000350 file_size = 0xffffffff
351 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352 self.extract_version = max(45, self.extract_version)
353 self.create_version = max(45, self.extract_version)
354
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000355 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000356 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000357 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000358 self.compress_type, dostime, dosdate, CRC,
359 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000360 len(filename), len(extra))
361 return header + filename + extra
362
363 def _encodeFilenameFlags(self):
364 try:
365 return self.filename.encode('ascii'), self.flag_bits
366 except UnicodeEncodeError:
367 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368
369 def _decodeExtra(self):
370 # Try to decode the extra field.
371 extra = self.extra
372 unpack = struct.unpack
373 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000374 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000375 if tp == 1:
376 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000379 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000380 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000381 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 elif ln == 0:
383 counts = ()
384 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000385 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386
387 idx = 0
388
389 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000390 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391 self.file_size = counts[idx]
392 idx += 1
393
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000394 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000395 self.compress_size = counts[idx]
396 idx += 1
397
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000398 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000399 old = self.header_offset
400 self.header_offset = counts[idx]
401 idx+=1
402
403 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000404
405
Thomas Wouterscf297e42007-02-23 15:07:44 +0000406class _ZipDecrypter:
407 """Class to handle decryption of files stored within a ZIP archive.
408
409 ZIP supports a password-based form of encryption. Even though known
410 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000411 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000412
413 Usage:
414 zd = _ZipDecrypter(mypwd)
415 plain_char = zd(cypher_char)
416 plain_text = map(zd, cypher_text)
417 """
418
419 def _GenerateCRCTable():
420 """Generate a CRC-32 table.
421
422 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
423 internal keys. We noticed that a direct implementation is faster than
424 relying on binascii.crc32().
425 """
426 poly = 0xedb88320
427 table = [0] * 256
428 for i in range(256):
429 crc = i
430 for j in range(8):
431 if crc & 1:
432 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
433 else:
434 crc = ((crc >> 1) & 0x7FFFFFFF)
435 table[i] = crc
436 return table
437 crctable = _GenerateCRCTable()
438
439 def _crc32(self, ch, crc):
440 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000441 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000442
443 def __init__(self, pwd):
444 self.key0 = 305419896
445 self.key1 = 591751049
446 self.key2 = 878082192
447 for p in pwd:
448 self._UpdateKeys(p)
449
450 def _UpdateKeys(self, c):
451 self.key0 = self._crc32(c, self.key0)
452 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
453 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000454 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000455
456 def __call__(self, c):
457 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000458 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000459 k = self.key2 | 2
460 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000461 self._UpdateKeys(c)
462 return c
463
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000464class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000465 """File-like object for reading an archive member.
466 Is returned by ZipFile.open().
467 """
468
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000469 # Max size supported by decompressor.
470 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000471
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000472 # Read from compressed files in 4k blocks.
473 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000474
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000475 # Search for universal newlines or line chunks.
476 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
477
Łukasz Langae94980a2010-11-22 23:31:26 +0000478 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
479 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000480 self._fileobj = fileobj
481 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000482 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000483
Ezio Melotti92b47432010-01-28 01:44:41 +0000484 self._compress_type = zipinfo.compress_type
485 self._compress_size = zipinfo.compress_size
486 self._compress_left = zipinfo.compress_size
487
488 if self._compress_type == ZIP_DEFLATED:
489 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000490 self._unconsumed = b''
491
492 self._readbuffer = b''
493 self._offset = 0
494
495 self._universal = 'U' in mode
496 self.newlines = None
497
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000498 # Adjust read size for encrypted files since the first 12 bytes
499 # are for the encryption/password information.
500 if self._decrypter is not None:
501 self._compress_left -= 12
502
503 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000504 self.name = zipinfo.filename
505
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000506 if hasattr(zipinfo, 'CRC'):
507 self._expected_crc = zipinfo.CRC
508 self._running_crc = crc32(b'') & 0xffffffff
509 else:
510 self._expected_crc = None
511
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000512 def readline(self, limit=-1):
513 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000514
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000515 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000516 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000517
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000518 if not self._universal and limit < 0:
519 # Shortcut common case - newline found in buffer.
520 i = self._readbuffer.find(b'\n', self._offset) + 1
521 if i > 0:
522 line = self._readbuffer[self._offset: i]
523 self._offset = i
524 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000525
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000526 if not self._universal:
527 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000528
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000529 line = b''
530 while limit < 0 or len(line) < limit:
531 readahead = self.peek(2)
532 if readahead == b'':
533 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000534
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000535 #
536 # Search for universal newlines or line chunks.
537 #
538 # The pattern returns either a line chunk or a newline, but not
539 # both. Combined with peek(2), we are assured that the sequence
540 # '\r\n' is always retrieved completely and never split into
541 # separate newlines - '\r', '\n' due to coincidental readaheads.
542 #
543 match = self.PATTERN.search(readahead)
544 newline = match.group('newline')
545 if newline is not None:
546 if self.newlines is None:
547 self.newlines = []
548 if newline not in self.newlines:
549 self.newlines.append(newline)
550 self._offset += len(newline)
551 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000553 chunk = match.group('chunk')
554 if limit >= 0:
555 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000557 self._offset += len(chunk)
558 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000559
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000560 return line
561
562 def peek(self, n=1):
563 """Returns buffered bytes without advancing the position."""
564 if n > len(self._readbuffer) - self._offset:
565 chunk = self.read(n)
566 self._offset -= len(chunk)
567
568 # Return up to 512 bytes to reduce allocation overhead for tight loops.
569 return self._readbuffer[self._offset: self._offset + 512]
570
571 def readable(self):
572 return True
573
574 def read(self, n=-1):
575 """Read and return up to n bytes.
576 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000577 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000578 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000579 if n is None:
580 n = -1
581 while True:
582 if n < 0:
583 data = self.read1(n)
584 elif n > len(buf):
585 data = self.read1(n - len(buf))
586 else:
587 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000588 if len(data) == 0:
589 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000590 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000591
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000592 def _update_crc(self, newdata, eof):
593 # Update the CRC using the given data.
594 if self._expected_crc is None:
595 # No need to compute the CRC if we don't have a reference value
596 return
597 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
598 # Check the CRC if we're at the end of the file
599 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000600 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000601
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000602 def read1(self, n):
603 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000604
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000605 # Simplify algorithm (branching) by transforming negative n to large n.
606 if n < 0 or n is None:
607 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000608
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000609 # Bytes available in read buffer.
610 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000611
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000612 # Read from file.
613 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
614 nbytes = n - len_readbuffer - len(self._unconsumed)
615 nbytes = max(nbytes, self.MIN_READ_SIZE)
616 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000617
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000618 data = self._fileobj.read(nbytes)
619 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000620
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621 if data and self._decrypter is not None:
622 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000623
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000624 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000625 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000626 self._readbuffer = self._readbuffer[self._offset:] + data
627 self._offset = 0
628 else:
629 # Prepare deflated bytes for decompression.
630 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000631
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000632 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000633 if (len(self._unconsumed) > 0 and n > len_readbuffer and
634 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000635 data = self._decompressor.decompress(
636 self._unconsumed,
637 max(n - len_readbuffer, self.MIN_READ_SIZE)
638 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000639
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000640 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000641 eof = len(self._unconsumed) == 0 and self._compress_left == 0
642 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000643 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000644
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000645 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000646 self._readbuffer = self._readbuffer[self._offset:] + data
647 self._offset = 0
648
649 # Read from buffer.
650 data = self._readbuffer[self._offset: self._offset + n]
651 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000652 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000653
Łukasz Langae94980a2010-11-22 23:31:26 +0000654 def close(self):
655 try:
656 if self._close_fileobj:
657 self._fileobj.close()
658 finally:
659 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000660
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000661
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000662class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000663 """ Class with methods to open, read, write, close, list zip files.
664
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000665 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000666
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 file: Either the path to the file, or a file-like object.
668 If it is a path, the file will be opened and closed by ZipFile.
669 mode: The mode can be either read "r", write "w" or append "a".
670 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000671 allowZip64: if True ZipFile will create files with ZIP64 extensions when
672 needed, otherwise it will raise an exception when this would
673 be necessary.
674
Fred Drake3d9091e2001-03-26 15:49:24 +0000675 """
Fred Drake484d7352000-10-02 21:14:52 +0000676
Fred Drake90eac282001-02-28 05:29:34 +0000677 fp = None # Set here since __del__ checks it
678
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000679 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000680 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000681 if mode not in ("r", "w", "a"):
682 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
683
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 if compression == ZIP_STORED:
685 pass
686 elif compression == ZIP_DEFLATED:
687 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000688 raise RuntimeError(
689 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000691 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000692
693 self._allowZip64 = allowZip64
694 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000695 self.debug = 0 # Level of printing: 0 through 3
696 self.NameToInfo = {} # Find file info given name
697 self.filelist = [] # List of ZipInfo instances for archive
698 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000699 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000700 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400701 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000702
Fred Drake3d9091e2001-03-26 15:49:24 +0000703 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000704 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000705 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000706 self._filePassed = 0
707 self.filename = file
708 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000709 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000710 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000711 except IOError:
712 if mode == 'a':
713 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000714 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000715 else:
716 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000717 else:
718 self._filePassed = 1
719 self.fp = file
720 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000721
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000722 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 self._GetContents()
724 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000725 # set the modified flag so central directory gets written
726 # even if no files are added to the archive
727 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000729 try:
730 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000731 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000733 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000734 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000735 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000736 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000737
738 # set the modified flag so central directory gets written
739 # even if no files are added to the archive
740 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000742 if not self._filePassed:
743 self.fp.close()
744 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000745 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000746
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000747 def __enter__(self):
748 return self
749
750 def __exit__(self, type, value, traceback):
751 self.close()
752
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000753 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000754 """Read the directory, making sure we close the file if the format
755 is bad."""
756 try:
757 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000758 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000759 if not self._filePassed:
760 self.fp.close()
761 self.fp = None
762 raise
763
764 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000765 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000766 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000767 try:
768 endrec = _EndRecData(fp)
769 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000770 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000771 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000772 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000773 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000774 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000775 size_cd = endrec[_ECD_SIZE] # bytes in central directory
776 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400777 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000778
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000779 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000780 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000781 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
782 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000783 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
784
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000785 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000786 inferred = concat + offset_cd
787 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000788 # self.start_dir: Position of start of central directory
789 self.start_dir = offset_cd + concat
790 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000791 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000792 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000793 total = 0
794 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000795 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000796 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000797 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000798 centdir = struct.unpack(structCentralDir, centdir)
799 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000800 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000801 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000802 flags = centdir[5]
803 if flags & 0x800:
804 # UTF-8 file names extension
805 filename = filename.decode('utf-8')
806 else:
807 # Historical ZIP filename encoding
808 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000809 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000810 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000811 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
812 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000813 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000814 (x.create_version, x.create_system, x.extract_version, x.reserved,
815 x.flag_bits, x.compress_type, t, d,
816 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
817 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
818 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000819 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000821 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000822
823 x._decodeExtra()
824 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 self.filelist.append(x)
826 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000827
828 # update total bytes read from central directory
829 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
830 + centdir[_CD_EXTRA_FIELD_LENGTH]
831 + centdir[_CD_COMMENT_LENGTH])
832
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000834 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000835
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836
837 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000838 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -0600839 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840
841 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000842 """Return a list of class ZipInfo instances for files in the
843 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000844 return self.filelist
845
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000846 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000847 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000848 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
849 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000850 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000851 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000852 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
853 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854
855 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000856 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000857 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000858 for zinfo in self.filelist:
859 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000860 # Read by chunks, to avoid an OverflowError or a
861 # MemoryError with very large embedded files.
862 f = self.open(zinfo.filename, "r")
863 while f.read(chunk_size): # Check CRC-32
864 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000865 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000866 return zinfo.filename
867
868 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000869 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000870 info = self.NameToInfo.get(name)
871 if info is None:
872 raise KeyError(
873 'There is no item named %r in the archive' % name)
874
875 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000876
Thomas Wouterscf297e42007-02-23 15:07:44 +0000877 def setpassword(self, pwd):
878 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000879 if pwd and not isinstance(pwd, bytes):
880 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
881 if pwd:
882 self.pwd = pwd
883 else:
884 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000885
R David Murrayf50b38a2012-04-12 18:44:58 -0400886 @property
887 def comment(self):
888 """The comment text associated with the ZIP file."""
889 return self._comment
890
891 @comment.setter
892 def comment(self, comment):
893 if not isinstance(comment, bytes):
894 raise TypeError("comment: expected bytes, got %s" % type(comment))
895 # check for valid comment length
896 if len(comment) >= ZIP_MAX_COMMENT:
897 if self.debug:
898 print('Archive comment is too long; truncating to %d bytes'
899 % ZIP_MAX_COMMENT)
900 comment = comment[:ZIP_MAX_COMMENT]
901 self._comment = comment
902 self._didModify = True
903
Thomas Wouterscf297e42007-02-23 15:07:44 +0000904 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000905 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000906 with self.open(name, "r", pwd) as fp:
907 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908
909 def open(self, name, mode="r", pwd=None):
910 """Return file-like object for 'name'."""
911 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000912 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +0000913 if pwd and not isinstance(pwd, bytes):
914 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000915 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000916 raise RuntimeError(
917 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000918
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919 # Only open a new file for instances where we were not
920 # given a file object in the constructor
921 if self._filePassed:
922 zef_file = self.fp
923 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000924 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000925
Georg Brandlb533e262008-05-25 18:19:30 +0000926 # Make sure we have an info object
927 if isinstance(name, ZipInfo):
928 # 'name' is already an info object
929 zinfo = name
930 else:
931 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000932 try:
933 zinfo = self.getinfo(name)
934 except KeyError:
935 if not self._filePassed:
936 zef_file.close()
937 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000939
940 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000941 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000942 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +0000943 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000944
945 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000947 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000949
Georg Brandl5ba11de2011-01-01 10:09:32 +0000950 if zinfo.flag_bits & 0x800:
951 # UTF-8 filename
952 fname_str = fname.decode("utf-8")
953 else:
954 fname_str = fname.decode("cp437")
955
956 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000957 if not self._filePassed:
958 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +0000959 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +0000960 'File name in directory %r and header %r differ.'
961 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000962
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963 # check for encrypted flag & handle password
964 is_encrypted = zinfo.flag_bits & 0x1
965 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000966 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000967 if not pwd:
968 pwd = self.pwd
969 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000970 if not self._filePassed:
971 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +0000972 raise RuntimeError("File %s is encrypted, "
973 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000974
Thomas Wouterscf297e42007-02-23 15:07:44 +0000975 zd = _ZipDecrypter(pwd)
976 # The first 12 bytes in the cypher stream is an encryption header
977 # used to strengthen the algorithm. The first 11 bytes are
978 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000979 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000980 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +0000981 header = zef_file.read(12)
982 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000983 if zinfo.flag_bits & 0x8:
984 # compare against the file type from extended local headers
985 check_byte = (zinfo._raw_time >> 8) & 0xff
986 else:
987 # compare against the CRC otherwise
988 check_byte = (zinfo.CRC >> 24) & 0xff
989 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000990 if not self._filePassed:
991 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +0000992 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000994 return ZipExtFile(zef_file, mode, zinfo, zd,
995 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000996
Christian Heimes790c8232008-01-07 21:14:23 +0000997 def extract(self, member, path=None, pwd=None):
998 """Extract a member from the archive to the current working directory,
999 using its full name. Its file information is extracted as accurately
1000 as possible. `member' may be a filename or a ZipInfo object. You can
1001 specify a different directory using `path'.
1002 """
1003 if not isinstance(member, ZipInfo):
1004 member = self.getinfo(member)
1005
1006 if path is None:
1007 path = os.getcwd()
1008
1009 return self._extract_member(member, path, pwd)
1010
1011 def extractall(self, path=None, members=None, pwd=None):
1012 """Extract all members from the archive to the current working
1013 directory. `path' specifies a different directory to extract to.
1014 `members' is optional and must be a subset of the list returned
1015 by namelist().
1016 """
1017 if members is None:
1018 members = self.namelist()
1019
1020 for zipinfo in members:
1021 self.extract(zipinfo, path, pwd)
1022
1023 def _extract_member(self, member, targetpath, pwd):
1024 """Extract the ZipInfo object 'member' to a physical
1025 file on the path targetpath.
1026 """
1027 # build the destination pathname, replacing
1028 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001029 # Strip trailing path separator, unless it represents the root.
1030 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1031 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001032 targetpath = targetpath[:-1]
1033
1034 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001035 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001036 targetpath = os.path.join(targetpath, member.filename[1:])
1037 else:
1038 targetpath = os.path.join(targetpath, member.filename)
1039
1040 targetpath = os.path.normpath(targetpath)
1041
1042 # Create all upper directories if necessary.
1043 upperdirs = os.path.dirname(targetpath)
1044 if upperdirs and not os.path.exists(upperdirs):
1045 os.makedirs(upperdirs)
1046
Martin v. Löwis59e47792009-01-24 14:10:07 +00001047 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001048 if not os.path.isdir(targetpath):
1049 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001050 return targetpath
1051
Georg Brandlb533e262008-05-25 18:19:30 +00001052 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001053 target = open(targetpath, "wb")
1054 shutil.copyfileobj(source, target)
1055 source.close()
1056 target.close()
1057
1058 return targetpath
1059
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001060 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001061 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001062 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001063 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001064 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001066 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001067 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001068 raise RuntimeError(
1069 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001070 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001071 raise RuntimeError(
1072 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001074 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001075 if zinfo.file_size > ZIP64_LIMIT:
1076 if not self._allowZip64:
1077 raise LargeZipFile("Filesize would require ZIP64 extensions")
1078 if zinfo.header_offset > ZIP64_LIMIT:
1079 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001080 raise LargeZipFile(
1081 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082
1083 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001084 """Put the bytes from filename into the archive under the name
1085 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001086 if not self.fp:
1087 raise RuntimeError(
1088 "Attempt to write to ZIP archive that was already closed")
1089
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001091 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001092 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 date_time = mtime[0:6]
1094 # Create ZipInfo instance to store file information
1095 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001096 arcname = filename
1097 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1098 while arcname[0] in (os.sep, os.altsep):
1099 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001100 if isdir:
1101 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001102 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001103 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001104 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001105 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 else:
Tim Peterse1190062001-01-15 03:34:38 +00001107 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001108
1109 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001110 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001111 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001112
1113 self._writecheck(zinfo)
1114 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001115
1116 if isdir:
1117 zinfo.file_size = 0
1118 zinfo.compress_size = 0
1119 zinfo.CRC = 0
1120 self.filelist.append(zinfo)
1121 self.NameToInfo[zinfo.filename] = zinfo
1122 self.fp.write(zinfo.FileHeader())
1123 return
1124
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001125 with open(filename, "rb") as fp:
1126 # Must overwrite CRC and sizes with correct data later
1127 zinfo.CRC = CRC = 0
1128 zinfo.compress_size = compress_size = 0
1129 zinfo.file_size = file_size = 0
1130 self.fp.write(zinfo.FileHeader())
1131 if zinfo.compress_type == ZIP_DEFLATED:
1132 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1133 zlib.DEFLATED, -15)
1134 else:
1135 cmpr = None
1136 while 1:
1137 buf = fp.read(1024 * 8)
1138 if not buf:
1139 break
1140 file_size = file_size + len(buf)
1141 CRC = crc32(buf, CRC) & 0xffffffff
1142 if cmpr:
1143 buf = cmpr.compress(buf)
1144 compress_size = compress_size + len(buf)
1145 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146 if cmpr:
1147 buf = cmpr.flush()
1148 compress_size = compress_size + len(buf)
1149 self.fp.write(buf)
1150 zinfo.compress_size = compress_size
1151 else:
1152 zinfo.compress_size = file_size
1153 zinfo.CRC = CRC
1154 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001155 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001156 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001157 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001158 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001159 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001160 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001161 self.filelist.append(zinfo)
1162 self.NameToInfo[zinfo.filename] = zinfo
1163
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001164 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001165 """Write a file into the archive. The contents is 'data', which
1166 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1167 it is encoded as UTF-8 first.
1168 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001169 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001170 if isinstance(data, str):
1171 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001172 if not isinstance(zinfo_or_arcname, ZipInfo):
1173 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001174 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001175 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001176 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001177 else:
1178 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001179
1180 if not self.fp:
1181 raise RuntimeError(
1182 "Attempt to write to ZIP archive that was already closed")
1183
Guido van Rossum85825dc2007-08-27 17:03:28 +00001184 zinfo.file_size = len(data) # Uncompressed size
1185 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001186 if compress_type is not None:
1187 zinfo.compress_type = compress_type
1188
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001189 self._writecheck(zinfo)
1190 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001191 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001192 if zinfo.compress_type == ZIP_DEFLATED:
1193 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1194 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001195 data = co.compress(data) + co.flush()
1196 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001197 else:
1198 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001199 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001200 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001201 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001202 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001203 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001204 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001205 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001206 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001207 self.filelist.append(zinfo)
1208 self.NameToInfo[zinfo.filename] = zinfo
1209
1210 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001211 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001212 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001213
1214 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001215 """Close the file, and for mode "w" and "a" write the ending
1216 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001217 if self.fp is None:
1218 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001219
1220 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 count = 0
1222 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001223 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001224 count = count + 1
1225 dt = zinfo.date_time
1226 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001227 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001228 extra = []
1229 if zinfo.file_size > ZIP64_LIMIT \
1230 or zinfo.compress_size > ZIP64_LIMIT:
1231 extra.append(zinfo.file_size)
1232 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001233 file_size = 0xffffffff
1234 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001235 else:
1236 file_size = zinfo.file_size
1237 compress_size = zinfo.compress_size
1238
1239 if zinfo.header_offset > ZIP64_LIMIT:
1240 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001241 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001242 else:
1243 header_offset = zinfo.header_offset
1244
1245 extra_data = zinfo.extra
1246 if extra:
1247 # Append a ZIP64 field to the extra's
1248 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001249 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001250 1, 8*len(extra), *extra) + extra_data
1251
1252 extract_version = max(45, zinfo.extract_version)
1253 create_version = max(45, zinfo.create_version)
1254 else:
1255 extract_version = zinfo.extract_version
1256 create_version = zinfo.create_version
1257
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001258 try:
1259 filename, flag_bits = zinfo._encodeFilenameFlags()
1260 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001261 stringCentralDir, create_version,
1262 zinfo.create_system, extract_version, zinfo.reserved,
1263 flag_bits, zinfo.compress_type, dostime, dosdate,
1264 zinfo.CRC, compress_size, file_size,
1265 len(filename), len(extra_data), len(zinfo.comment),
1266 0, zinfo.internal_attr, zinfo.external_attr,
1267 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001268 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001269 print((structCentralDir, stringCentralDir, create_version,
1270 zinfo.create_system, extract_version, zinfo.reserved,
1271 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1272 zinfo.CRC, compress_size, file_size,
1273 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1274 0, zinfo.internal_attr, zinfo.external_attr,
1275 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001276 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001277 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001278 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001279 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001281
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001282 pos2 = self.fp.tell()
1283 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001284 centDirCount = count
1285 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001286 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001287 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1288 centDirOffset > ZIP64_LIMIT or
1289 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001290 # Need to write the ZIP64 end-of-archive records
1291 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001292 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001293 44, 45, 45, 0, 0, centDirCount, centDirCount,
1294 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001295 self.fp.write(zip64endrec)
1296
1297 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001298 structEndArchive64Locator,
1299 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001300 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001301 centDirCount = min(centDirCount, 0xFFFF)
1302 centDirSize = min(centDirSize, 0xFFFFFFFF)
1303 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001304
Georg Brandl2ee470f2008-07-16 12:55:28 +00001305 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001306 0, 0, centDirCount, centDirCount,
R David Murrayf50b38a2012-04-12 18:44:58 -04001307 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001308 self.fp.write(endrec)
R David Murrayf50b38a2012-04-12 18:44:58 -04001309 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001310 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001311
Fred Drake3d9091e2001-03-26 15:49:24 +00001312 if not self._filePassed:
1313 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001314 self.fp = None
1315
1316
1317class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001318 """Class to create ZIP archives with Python library files and packages."""
1319
Georg Brandl8334fd92010-12-04 10:26:46 +00001320 def __init__(self, file, mode="r", compression=ZIP_STORED,
1321 allowZip64=False, optimize=-1):
1322 ZipFile.__init__(self, file, mode=mode, compression=compression,
1323 allowZip64=allowZip64)
1324 self._optimize = optimize
1325
Georg Brandlfe991052009-09-16 15:54:04 +00001326 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001327 """Add all files from "pathname" to the ZIP archive.
1328
Fred Drake484d7352000-10-02 21:14:52 +00001329 If pathname is a package directory, search the directory and
1330 all package subdirectories recursively for all *.py and enter
1331 the modules into the archive. If pathname is a plain
1332 directory, listdir *.py and enter all modules. Else, pathname
1333 must be a Python *.py file and the module will be put into the
1334 archive. Added modules are always module.pyo or module.pyc.
1335 This method will compile the module.py into module.pyc if
1336 necessary.
1337 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 dir, name = os.path.split(pathname)
1339 if os.path.isdir(pathname):
1340 initname = os.path.join(pathname, "__init__.py")
1341 if os.path.isfile(initname):
1342 # This is a package directory, add it
1343 if basename:
1344 basename = "%s/%s" % (basename, name)
1345 else:
1346 basename = name
1347 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001348 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 fname, arcname = self._get_codename(initname[0:-3], basename)
1350 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001351 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001352 self.write(fname, arcname)
1353 dirlist = os.listdir(pathname)
1354 dirlist.remove("__init__.py")
1355 # Add all *.py files and package subdirectories
1356 for filename in dirlist:
1357 path = os.path.join(pathname, filename)
1358 root, ext = os.path.splitext(filename)
1359 if os.path.isdir(path):
1360 if os.path.isfile(os.path.join(path, "__init__.py")):
1361 # This is a package directory, add it
1362 self.writepy(path, basename) # Recursive call
1363 elif ext == ".py":
1364 fname, arcname = self._get_codename(path[0:-3],
1365 basename)
1366 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001367 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001368 self.write(fname, arcname)
1369 else:
1370 # This is NOT a package directory, add its files at top level
1371 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001372 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001373 for filename in os.listdir(pathname):
1374 path = os.path.join(pathname, filename)
1375 root, ext = os.path.splitext(filename)
1376 if ext == ".py":
1377 fname, arcname = self._get_codename(path[0:-3],
1378 basename)
1379 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001380 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001381 self.write(fname, arcname)
1382 else:
1383 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001384 raise RuntimeError(
1385 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001386 fname, arcname = self._get_codename(pathname[0:-3], basename)
1387 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001388 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001389 self.write(fname, arcname)
1390
1391 def _get_codename(self, pathname, basename):
1392 """Return (filename, archivename) for the path.
1393
Fred Drake484d7352000-10-02 21:14:52 +00001394 Given a module name path, return the correct file path and
1395 archive name, compiling if necessary. For example, given
1396 /python/lib/string, return (/python/lib/string.pyc, string).
1397 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001398 def _compile(file, optimize=-1):
1399 import py_compile
1400 if self.debug:
1401 print("Compiling", file)
1402 try:
1403 py_compile.compile(file, doraise=True, optimize=optimize)
1404 except py_compile.PyCompileError as error:
1405 print(err.msg)
1406 return False
1407 return True
1408
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001409 file_py = pathname + ".py"
1410 file_pyc = pathname + ".pyc"
1411 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001412 pycache_pyc = imp.cache_from_source(file_py, True)
1413 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001414 if self._optimize == -1:
1415 # legacy mode: use whatever file is present
1416 if (os.path.isfile(file_pyo) and
1417 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1418 # Use .pyo file.
1419 arcname = fname = file_pyo
1420 elif (os.path.isfile(file_pyc) and
1421 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1422 # Use .pyc file.
1423 arcname = fname = file_pyc
1424 elif (os.path.isfile(pycache_pyc) and
1425 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1426 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1427 # file name in the archive.
1428 fname = pycache_pyc
1429 arcname = file_pyc
1430 elif (os.path.isfile(pycache_pyo) and
1431 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1432 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1433 # file name in the archive.
1434 fname = pycache_pyo
1435 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001436 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001437 # Compile py into PEP 3147 pyc file.
1438 if _compile(file_py):
1439 fname = (pycache_pyc if __debug__ else pycache_pyo)
1440 arcname = (file_pyc if __debug__ else file_pyo)
1441 else:
1442 fname = arcname = file_py
1443 else:
1444 # new mode: use given optimization level
1445 if self._optimize == 0:
1446 fname = pycache_pyc
1447 arcname = file_pyc
1448 else:
1449 fname = pycache_pyo
1450 arcname = file_pyo
1451 if not (os.path.isfile(fname) and
1452 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1453 if not _compile(file_py, optimize=self._optimize):
1454 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001455 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001456 if basename:
1457 archivename = "%s/%s" % (basename, archivename)
1458 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001459
1460
1461def main(args = None):
1462 import textwrap
1463 USAGE=textwrap.dedent("""\
1464 Usage:
1465 zipfile.py -l zipfile.zip # Show listing of a zipfile
1466 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1467 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1468 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1469 """)
1470 if args is None:
1471 args = sys.argv[1:]
1472
1473 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001474 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001475 sys.exit(1)
1476
1477 if args[0] == '-l':
1478 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001479 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001480 sys.exit(1)
1481 zf = ZipFile(args[1], 'r')
1482 zf.printdir()
1483 zf.close()
1484
1485 elif args[0] == '-t':
1486 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001487 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001488 sys.exit(1)
1489 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001490 badfile = zf.testzip()
1491 if badfile:
1492 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001493 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001494
1495 elif args[0] == '-e':
1496 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001497 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001498 sys.exit(1)
1499
1500 zf = ZipFile(args[1], 'r')
1501 out = args[2]
1502 for path in zf.namelist():
1503 if path.startswith('./'):
1504 tgt = os.path.join(out, path[2:])
1505 else:
1506 tgt = os.path.join(out, path)
1507
1508 tgtdir = os.path.dirname(tgt)
1509 if not os.path.exists(tgtdir):
1510 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001511 with open(tgt, 'wb') as fp:
1512 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001513 zf.close()
1514
1515 elif args[0] == '-c':
1516 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001517 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001518 sys.exit(1)
1519
1520 def addToZip(zf, path, zippath):
1521 if os.path.isfile(path):
1522 zf.write(path, zippath, ZIP_DEFLATED)
1523 elif os.path.isdir(path):
1524 for nm in os.listdir(path):
1525 addToZip(zf,
1526 os.path.join(path, nm), os.path.join(zippath, nm))
1527 # else: ignore
1528
1529 zf = ZipFile(args[1], 'w', allowZip64=True)
1530 for src in args[2:]:
1531 addToZip(zf, src, os.path.basename(src))
1532
1533 zf.close()
1534
1535if __name__ == "__main__":
1536 main()