blob: 1cdfcebc450e2d0e6d33c88493bb38501da9e34f [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020025try:
26 import bz2 # We may need its compression method
27except ImportError:
28 bz2 = None
29
30__all__ = ["BadZipFile", "BadZipfile", "error",
31 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2"
Georg Brandl4d540882010-10-28 06:42:33 +000032 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000033
Georg Brandl4d540882010-10-28 06:42:33 +000034class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000036
37
38class LargeZipFile(Exception):
39 """
40 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
41 and those extensions are disabled.
42 """
43
Georg Brandl4d540882010-10-28 06:42:33 +000044error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
45
Guido van Rossum32abe6f2000-03-31 17:30:02 +000046
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000047ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000048ZIP_FILECOUNT_LIMIT = 1 << 16
49ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000050
Guido van Rossum32abe6f2000-03-31 17:30:02 +000051# constants for Zip file compression methods
52ZIP_STORED = 0
53ZIP_DEFLATED = 8
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020054ZIP_BZIP2 = 12
Guido van Rossum32abe6f2000-03-31 17:30:02 +000055# Other ZIP compression methods not supported
56
Martin v. Löwisf6b16a42012-05-01 07:58:44 +020057DEFAULT_VERSION = 20
58ZIP64_VERSION = 45
59BZIP2_VERSION = 46
60
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061# Below are some formats and associated data for reading/writing headers using
62# the struct module. The names and structures of headers/records are those used
63# in the PKWARE description of the ZIP file format:
64# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
65# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000066
Martin v. Löwisb09b8442008-07-03 14:13:42 +000067# The "end of central directory" structure, magic number, size, and indices
68# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000069structEndArchive = b"<4s4H2LH"
70stringEndArchive = b"PK\005\006"
71sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000072
73_ECD_SIGNATURE = 0
74_ECD_DISK_NUMBER = 1
75_ECD_DISK_START = 2
76_ECD_ENTRIES_THIS_DISK = 3
77_ECD_ENTRIES_TOTAL = 4
78_ECD_SIZE = 5
79_ECD_OFFSET = 6
80_ECD_COMMENT_SIZE = 7
81# These last two indices are not part of the structure as defined in the
82# spec, but they are used internally by this module as a convenience
83_ECD_COMMENT = 8
84_ECD_LOCATION = 9
85
86# The "central directory" structure, magic number, size, and indices
87# of entries in the structure (section V.F in the format document)
88structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000089stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000090sizeCentralDir = struct.calcsize(structCentralDir)
91
Fred Drake3e038e52001-02-28 17:56:26 +000092# indexes of entries in the central directory structure
93_CD_SIGNATURE = 0
94_CD_CREATE_VERSION = 1
95_CD_CREATE_SYSTEM = 2
96_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000097_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000098_CD_FLAG_BITS = 5
99_CD_COMPRESS_TYPE = 6
100_CD_TIME = 7
101_CD_DATE = 8
102_CD_CRC = 9
103_CD_COMPRESSED_SIZE = 10
104_CD_UNCOMPRESSED_SIZE = 11
105_CD_FILENAME_LENGTH = 12
106_CD_EXTRA_FIELD_LENGTH = 13
107_CD_COMMENT_LENGTH = 14
108_CD_DISK_NUMBER_START = 15
109_CD_INTERNAL_FILE_ATTRIBUTES = 16
110_CD_EXTERNAL_FILE_ATTRIBUTES = 17
111_CD_LOCAL_HEADER_OFFSET = 18
112
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000113# The "local file header" structure, magic number, size, and indices
114# (section V.A in the format document)
115structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000116stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000117sizeFileHeader = struct.calcsize(structFileHeader)
118
Fred Drake3e038e52001-02-28 17:56:26 +0000119_FH_SIGNATURE = 0
120_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000122_FH_GENERAL_PURPOSE_FLAG_BITS = 3
123_FH_COMPRESSION_METHOD = 4
124_FH_LAST_MOD_TIME = 5
125_FH_LAST_MOD_DATE = 6
126_FH_CRC = 7
127_FH_COMPRESSED_SIZE = 8
128_FH_UNCOMPRESSED_SIZE = 9
129_FH_FILENAME_LENGTH = 10
130_FH_EXTRA_FIELD_LENGTH = 11
131
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000132# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000133structEndArchive64Locator = "<4sLQL"
134stringEndArchive64Locator = b"PK\x06\x07"
135sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000136
137# The "Zip64 end of central directory" record, magic number, size, and indices
138# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000139structEndArchive64 = "<4sQ2H2L4Q"
140stringEndArchive64 = b"PK\x06\x06"
141sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000142
143_CD64_SIGNATURE = 0
144_CD64_DIRECTORY_RECSIZE = 1
145_CD64_CREATE_VERSION = 2
146_CD64_EXTRACT_VERSION = 3
147_CD64_DISK_NUMBER = 4
148_CD64_DISK_NUMBER_START = 5
149_CD64_NUMBER_ENTRIES_THIS_DISK = 6
150_CD64_NUMBER_ENTRIES_TOTAL = 7
151_CD64_DIRECTORY_SIZE = 8
152_CD64_OFFSET_START_CENTDIR = 9
153
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000154def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000155 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000156 if _EndRecData(fp):
157 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000158 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000159 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000160 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000161
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000162def is_zipfile(filename):
163 """Quickly see if a file is a ZIP file by checking the magic number.
164
165 The filename argument may be a file or file-like object too.
166 """
167 result = False
168 try:
169 if hasattr(filename, "read"):
170 result = _check_zipfile(fp=filename)
171 else:
172 with open(filename, "rb") as fp:
173 result = _check_zipfile(fp)
174 except IOError:
175 pass
176 return result
177
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000178def _EndRecData64(fpin, offset, endrec):
179 """
180 Read the ZIP64 end-of-archive records and use that to update endrec
181 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000182 try:
183 fpin.seek(offset - sizeEndCentDir64Locator, 2)
184 except IOError:
185 # If the seek fails, the file is not large enough to contain a ZIP64
186 # end-of-archive record, so just return the end record we were given.
187 return endrec
188
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000189 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000190 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
191 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000192 return endrec
193
194 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000195 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000196
197 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000198 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
199 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000200 sig, sz, create_version, read_version, disk_num, disk_dir, \
201 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000202 struct.unpack(structEndArchive64, data)
203 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000204 return endrec
205
206 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000207 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000208 endrec[_ECD_DISK_NUMBER] = disk_num
209 endrec[_ECD_DISK_START] = disk_dir
210 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
211 endrec[_ECD_ENTRIES_TOTAL] = dircount2
212 endrec[_ECD_SIZE] = dirsize
213 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000214 return endrec
215
216
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000217def _EndRecData(fpin):
218 """Return data from the "End of Central Directory" record, or None.
219
220 The data is a list of the nine items in the ZIP "End of central dir"
221 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000222
223 # Determine file size
224 fpin.seek(0, 2)
225 filesize = fpin.tell()
226
227 # Check to see if this is ZIP file with no archive comment (the
228 # "end of central directory" structure should be the last item in the
229 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000230 try:
231 fpin.seek(-sizeEndCentDir, 2)
232 except IOError:
233 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000234 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000235 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000236 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000237 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 endrec=list(endrec)
239
240 # Append a blank comment and record start offset
241 endrec.append(b"")
242 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000243
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000244 # Try to read the "Zip64 end of central directory" structure
245 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000246
247 # Either this is not a ZIP file, or it is a ZIP file with an archive
248 # comment. Search the end of the file for the "end of central directory"
249 # record signature. The comment is the last item in the ZIP file and may be
250 # up to 64K long. It is assumed that the "end of central directory" magic
251 # number does not appear in the comment.
252 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
253 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000254 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000255 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000256 if start >= 0:
257 # found the magic number; attempt to unpack and interpret
258 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000259 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray4fbb9db2011-06-09 15:50:51 -0400260 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
261 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
262 endrec.append(comment)
263 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000264
R David Murray4fbb9db2011-06-09 15:50:51 -0400265 # Try to read the "Zip64 end of central directory" structure
266 return _EndRecData64(fpin, maxCommentStart + start - filesize,
267 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000268
269 # Unable to find a valid end of central directory structure
270 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000271
Fred Drake484d7352000-10-02 21:14:52 +0000272
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000273class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000274 """Class with attributes describing each file in the ZIP archive."""
275
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000276 __slots__ = (
277 'orig_filename',
278 'filename',
279 'date_time',
280 'compress_type',
281 'comment',
282 'extra',
283 'create_system',
284 'create_version',
285 'extract_version',
286 'reserved',
287 'flag_bits',
288 'volume',
289 'internal_attr',
290 'external_attr',
291 'header_offset',
292 'CRC',
293 'compress_size',
294 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000295 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000296 )
297
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000299 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000300
301 # Terminate the file name at the first null byte. Null bytes in file
302 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000303 null_byte = filename.find(chr(0))
304 if null_byte >= 0:
305 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000306 # This is used to ensure paths in generated ZIP files always use
307 # forward slashes as the directory separator, as required by the
308 # ZIP format specification.
309 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000310 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000311
Greg Ward8e36d282003-06-18 00:53:06 +0000312 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000313 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaran29fa9d42011-10-20 01:46:00 +0800314
315 if date_time[0] < 1980:
316 raise ValueError('ZIP does not support timestamps before 1980')
317
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000318 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000319 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000320 self.comment = b"" # Comment for each file
321 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000322 if sys.platform == 'win32':
323 self.create_system = 0 # System which created ZIP archive
324 else:
325 # Assume everything else is unix-y
326 self.create_system = 3 # System which created ZIP archive
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200327 self.create_version = DEFAULT_VERSION # Version which created ZIP archive
328 self.extract_version = DEFAULT_VERSION # Version needed to extract archive
Tim Peterse1190062001-01-15 03:34:38 +0000329 self.reserved = 0 # Must be zero
330 self.flag_bits = 0 # ZIP flag bits
331 self.volume = 0 # Volume number of file header
332 self.internal_attr = 0 # Internal attributes
333 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000336 # CRC CRC-32 of the uncompressed file
337 # compress_size Size of the compressed file
338 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000339
340 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000341 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000342 dt = self.date_time
343 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000344 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000345 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000346 # Set these to zero because we write them after the file data
347 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000348 else:
Tim Peterse1190062001-01-15 03:34:38 +0000349 CRC = self.CRC
350 compress_size = self.compress_size
351 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000352
353 extra = self.extra
354
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200355 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000356 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
357 # File is larger than what fits into a 4 byte integer,
358 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000359 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000360 extra = extra + struct.pack(fmt,
361 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000362 file_size = 0xffffffff
363 compress_size = 0xffffffff
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200364 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200366 if self.compress_type == ZIP_BZIP2:
367 min_version = max(BZIP2_VERSION, min_version)
368
369 self.extract_version = max(min_version, self.extract_version)
370 self.create_version = max(min_version, self.create_version)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000371 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000372 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000373 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000374 self.compress_type, dostime, dosdate, CRC,
375 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000376 len(filename), len(extra))
377 return header + filename + extra
378
379 def _encodeFilenameFlags(self):
380 try:
381 return self.filename.encode('ascii'), self.flag_bits
382 except UnicodeEncodeError:
383 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000384
385 def _decodeExtra(self):
386 # Try to decode the extra field.
387 extra = self.extra
388 unpack = struct.unpack
389 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000390 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000391 if tp == 1:
392 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000393 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000394 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000395 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000396 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000397 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000398 elif ln == 0:
399 counts = ()
400 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000401 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000402
403 idx = 0
404
405 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000406 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 self.file_size = counts[idx]
408 idx += 1
409
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000410 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000411 self.compress_size = counts[idx]
412 idx += 1
413
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000414 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000415 old = self.header_offset
416 self.header_offset = counts[idx]
417 idx+=1
418
419 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000420
421
Thomas Wouterscf297e42007-02-23 15:07:44 +0000422class _ZipDecrypter:
423 """Class to handle decryption of files stored within a ZIP archive.
424
425 ZIP supports a password-based form of encryption. Even though known
426 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000427 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000428
429 Usage:
430 zd = _ZipDecrypter(mypwd)
431 plain_char = zd(cypher_char)
432 plain_text = map(zd, cypher_text)
433 """
434
435 def _GenerateCRCTable():
436 """Generate a CRC-32 table.
437
438 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
439 internal keys. We noticed that a direct implementation is faster than
440 relying on binascii.crc32().
441 """
442 poly = 0xedb88320
443 table = [0] * 256
444 for i in range(256):
445 crc = i
446 for j in range(8):
447 if crc & 1:
448 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
449 else:
450 crc = ((crc >> 1) & 0x7FFFFFFF)
451 table[i] = crc
452 return table
453 crctable = _GenerateCRCTable()
454
455 def _crc32(self, ch, crc):
456 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000457 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000458
459 def __init__(self, pwd):
460 self.key0 = 305419896
461 self.key1 = 591751049
462 self.key2 = 878082192
463 for p in pwd:
464 self._UpdateKeys(p)
465
466 def _UpdateKeys(self, c):
467 self.key0 = self._crc32(c, self.key0)
468 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
469 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000470 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000471
472 def __call__(self, c):
473 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000474 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000475 k = self.key2 | 2
476 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000477 self._UpdateKeys(c)
478 return c
479
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200480
481def _check_compression(compression):
482 if compression == ZIP_STORED:
483 pass
484 elif compression == ZIP_DEFLATED:
485 if not zlib:
486 raise RuntimeError(
487 "Compression requires the (missing) zlib module")
488 elif compression == ZIP_BZIP2:
489 if not bz2:
490 raise RuntimeError(
491 "Compression requires the (missing) bz2 module")
492 else:
493 raise RuntimeError("That compression method is not supported")
494
495
496def _get_compressor(compress_type):
497 if compress_type == ZIP_DEFLATED:
498 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
499 zlib.DEFLATED, -15)
500 elif compress_type == ZIP_BZIP2:
501 return bz2.BZ2Compressor()
502 else:
503 return None
504
505
506def _get_decompressor(compress_type):
507 if compress_type == ZIP_DEFLATED:
508 return zlib.decompressobj(-15)
509 elif compress_type == ZIP_BZIP2:
510 return bz2.BZ2Decompressor()
511 else:
512 return None
513
514
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000515class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000516 """File-like object for reading an archive member.
517 Is returned by ZipFile.open().
518 """
519
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000520 # Max size supported by decompressor.
521 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000522
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000523 # Read from compressed files in 4k blocks.
524 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000525
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000526 # Search for universal newlines or line chunks.
527 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
528
Łukasz Langae94980a2010-11-22 23:31:26 +0000529 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
530 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000531 self._fileobj = fileobj
532 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000533 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000534
Ezio Melotti92b47432010-01-28 01:44:41 +0000535 self._compress_type = zipinfo.compress_type
Ezio Melotti92b47432010-01-28 01:44:41 +0000536 self._compress_left = zipinfo.compress_size
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200537 self._left = zipinfo.file_size
Ezio Melotti92b47432010-01-28 01:44:41 +0000538
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200539 self._decompressor = _get_decompressor(self._compress_type)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000540
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200541 self._eof = False
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000542 self._readbuffer = b''
543 self._offset = 0
544
545 self._universal = 'U' in mode
546 self.newlines = None
547
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000548 # Adjust read size for encrypted files since the first 12 bytes
549 # are for the encryption/password information.
550 if self._decrypter is not None:
551 self._compress_left -= 12
552
553 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000554 self.name = zipinfo.filename
555
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000556 if hasattr(zipinfo, 'CRC'):
557 self._expected_crc = zipinfo.CRC
558 self._running_crc = crc32(b'') & 0xffffffff
559 else:
560 self._expected_crc = None
561
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000562 def readline(self, limit=-1):
563 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000565 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000566 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000568 if not self._universal and limit < 0:
569 # Shortcut common case - newline found in buffer.
570 i = self._readbuffer.find(b'\n', self._offset) + 1
571 if i > 0:
572 line = self._readbuffer[self._offset: i]
573 self._offset = i
574 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000575
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000576 if not self._universal:
577 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000578
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000579 line = b''
580 while limit < 0 or len(line) < limit:
581 readahead = self.peek(2)
582 if readahead == b'':
583 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000584
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000585 #
586 # Search for universal newlines or line chunks.
587 #
588 # The pattern returns either a line chunk or a newline, but not
589 # both. Combined with peek(2), we are assured that the sequence
590 # '\r\n' is always retrieved completely and never split into
591 # separate newlines - '\r', '\n' due to coincidental readaheads.
592 #
593 match = self.PATTERN.search(readahead)
594 newline = match.group('newline')
595 if newline is not None:
596 if self.newlines is None:
597 self.newlines = []
598 if newline not in self.newlines:
599 self.newlines.append(newline)
600 self._offset += len(newline)
601 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000602
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000603 chunk = match.group('chunk')
604 if limit >= 0:
605 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000606
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000607 self._offset += len(chunk)
608 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000609
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000610 return line
611
612 def peek(self, n=1):
613 """Returns buffered bytes without advancing the position."""
614 if n > len(self._readbuffer) - self._offset:
615 chunk = self.read(n)
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200616 if len(chunk) > self._offset:
617 self._readbuffer = chunk + self._readbuffer[self._offset:]
618 self._offset = 0
619 else:
620 self._offset -= len(chunk)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621
622 # Return up to 512 bytes to reduce allocation overhead for tight loops.
623 return self._readbuffer[self._offset: self._offset + 512]
624
625 def readable(self):
626 return True
627
628 def read(self, n=-1):
629 """Read and return up to n bytes.
630 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000631 """
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200632 if n is None or n < 0:
633 buf = self._readbuffer[self._offset:]
634 self._readbuffer = b''
635 self._offset = 0
636 while not self._eof:
637 buf += self._read1(self.MAX_N)
638 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000639
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200640 n -= len(self._readbuffer) - self._offset
641 if n < 0:
642 buf = self._readbuffer[self._offset:n]
643 self._offset += len(buf)
644 return buf
645
646 buf = self._readbuffer[self._offset:]
647 self._readbuffer = b''
648 self._offset = 0
649 while n > 0 and not self._eof:
650 data = self._read1(n)
651 if n < len(data):
652 self._readbuffer = data
653 self._offset = n
654 buf += data[:n]
655 break
656 buf += data
657 n -= len(data)
658 return buf
659
660 def _update_crc(self, newdata):
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000661 # Update the CRC using the given data.
662 if self._expected_crc is None:
663 # No need to compute the CRC if we don't have a reference value
664 return
665 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
666 # Check the CRC if we're at the end of the file
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200667 if self._eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000668 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000669
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000670 def read1(self, n):
671 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000672
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200673 if n is None or n < 0:
674 buf = self._readbuffer[self._offset:]
675 self._readbuffer = b''
676 self._offset = 0
677 data = self._read1(self.MAX_N)
678 buf += data
679 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000680
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200681 n -= len(self._readbuffer) - self._offset
682 if n < 0:
683 buf = self._readbuffer[self._offset:n]
684 self._offset += len(buf)
685 return buf
686
687 buf = self._readbuffer[self._offset:]
688 self._readbuffer = b''
689 self._offset = 0
690 if n > 0:
691 data = self._read1(n)
692 if n < len(data):
693 self._readbuffer = data
694 self._offset = n
695 data = data[:n]
696 buf += data
697 return buf
698
699 def _read1(self, n):
700 # Read up to n compressed bytes with at most one read() system call,
701 # decrypt and decompress them.
702 if self._eof or n <= 0:
703 return b''
Guido van Rossumd8faa362007-04-27 19:54:29 +0000704
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000705 # Read from file.
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200706 if self._compress_type == ZIP_DEFLATED:
707 ## Handle unconsumed data.
708 data = self._decompressor.unconsumed_tail
709 if n > len(data):
710 data += self._read2(n - len(data))
711 else:
712 data = self._read2(n)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000713
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200714 if self._compress_type == ZIP_STORED:
715 self._eof = self._compress_left <= 0
716 elif self._compress_type == ZIP_DEFLATED:
717 n = max(n, self.MIN_READ_SIZE)
718 data = self._decompressor.decompress(data, n)
719 self._eof = (self._decompressor.eof or
720 self._compress_left <= 0 and
721 not self._decompressor.unconsumed_tail)
722 if self._eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000723 data += self._decompressor.flush()
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200724 else:
725 data = self._decompressor.decompress(data)
726 self._eof = self._decompressor.eof or self._compress_left <= 0
Guido van Rossumd8faa362007-04-27 19:54:29 +0000727
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200728 data = data[:self._left]
729 self._left -= len(data)
730 if self._left <= 0:
731 self._eof = True
732 self._update_crc(data)
733 return data
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000734
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200735 def _read2(self, n):
736 if self._compress_left <= 0:
737 return b''
738
739 n = max(n, self.MIN_READ_SIZE)
740 n = min(n, self._compress_left)
741
742 data = self._fileobj.read(n)
743 self._compress_left -= len(data)
744
745 if self._decrypter is not None:
746 data = bytes(map(self._decrypter, data))
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000747 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000748
Łukasz Langae94980a2010-11-22 23:31:26 +0000749 def close(self):
750 try:
751 if self._close_fileobj:
752 self._fileobj.close()
753 finally:
754 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000755
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000756
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000758 """ Class with methods to open, read, write, close, list zip files.
759
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000760 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000761
Fred Drake3d9091e2001-03-26 15:49:24 +0000762 file: Either the path to the file, or a file-like object.
763 If it is a path, the file will be opened and closed by ZipFile.
764 mode: The mode can be either read "r", write "w" or append "a".
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200765 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or
766 ZIP_BZIP2 (requires bz2).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000767 allowZip64: if True ZipFile will create files with ZIP64 extensions when
768 needed, otherwise it will raise an exception when this would
769 be necessary.
770
Fred Drake3d9091e2001-03-26 15:49:24 +0000771 """
Fred Drake484d7352000-10-02 21:14:52 +0000772
Fred Drake90eac282001-02-28 05:29:34 +0000773 fp = None # Set here since __del__ checks it
774
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000775 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000776 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000777 if mode not in ("r", "w", "a"):
778 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
779
Martin v. Löwisf6b16a42012-05-01 07:58:44 +0200780 _check_compression(compression)
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000781
782 self._allowZip64 = allowZip64
783 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000784 self.debug = 0 # Level of printing: 0 through 3
785 self.NameToInfo = {} # Find file info given name
786 self.filelist = [] # List of ZipInfo instances for archive
787 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000788 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000789 self.pwd = None
R David Murrayf50b38a2012-04-12 18:44:58 -0400790 self._comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000791
Fred Drake3d9091e2001-03-26 15:49:24 +0000792 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000793 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000794 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000795 self._filePassed = 0
796 self.filename = file
797 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000798 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000799 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000800 except IOError:
801 if mode == 'a':
802 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000803 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000804 else:
805 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000806 else:
807 self._filePassed = 1
808 self.fp = file
809 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000810
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 self._GetContents()
813 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000814 # set the modified flag so central directory gets written
815 # even if no files are added to the archive
816 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000817 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000818 try:
819 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000820 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000821 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000822 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000823 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000824 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000825 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000826
827 # set the modified flag so central directory gets written
828 # even if no files are added to the archive
829 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000831 if not self._filePassed:
832 self.fp.close()
833 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000834 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000836 def __enter__(self):
837 return self
838
839 def __exit__(self, type, value, traceback):
840 self.close()
841
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000843 """Read the directory, making sure we close the file if the format
844 is bad."""
845 try:
846 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000847 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000848 if not self._filePassed:
849 self.fp.close()
850 self.fp = None
851 raise
852
853 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000854 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000856 try:
857 endrec = _EndRecData(fp)
858 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000859 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000860 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000861 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000863 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000864 size_cd = endrec[_ECD_SIZE] # bytes in central directory
865 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murrayf50b38a2012-04-12 18:44:58 -0400866 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000867
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000868 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000869 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000870 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
871 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000872 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
873
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000874 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000875 inferred = concat + offset_cd
876 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877 # self.start_dir: Position of start of central directory
878 self.start_dir = offset_cd + concat
879 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000880 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000881 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000882 total = 0
883 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000884 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000885 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000886 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000887 centdir = struct.unpack(structCentralDir, centdir)
888 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000889 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000890 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000891 flags = centdir[5]
892 if flags & 0x800:
893 # UTF-8 file names extension
894 filename = filename.decode('utf-8')
895 else:
896 # Historical ZIP filename encoding
897 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000899 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000900 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
901 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000902 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000903 (x.create_version, x.create_system, x.extract_version, x.reserved,
904 x.flag_bits, x.compress_type, t, d,
905 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
906 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
907 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000908 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000909 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000910 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000911
912 x._decodeExtra()
913 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000914 self.filelist.append(x)
915 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000916
917 # update total bytes read from central directory
918 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
919 + centdir[_CD_EXTRA_FIELD_LENGTH]
920 + centdir[_CD_COMMENT_LENGTH])
921
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000922 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000923 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000924
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000925
926 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000927 """Return a list of file names in the archive."""
Ezio Melotti006917e2012-04-16 21:34:24 -0600928 return [data.filename for data in self.filelist]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000929
930 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000931 """Return a list of class ZipInfo instances for files in the
932 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000933 return self.filelist
934
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000935 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000936 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000937 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
938 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000939 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000940 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000941 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
942 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000943
944 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000945 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000946 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000947 for zinfo in self.filelist:
948 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000949 # Read by chunks, to avoid an OverflowError or a
950 # MemoryError with very large embedded files.
951 f = self.open(zinfo.filename, "r")
952 while f.read(chunk_size): # Check CRC-32
953 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000954 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000955 return zinfo.filename
956
957 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000958 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000959 info = self.NameToInfo.get(name)
960 if info is None:
961 raise KeyError(
962 'There is no item named %r in the archive' % name)
963
964 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000965
Thomas Wouterscf297e42007-02-23 15:07:44 +0000966 def setpassword(self, pwd):
967 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000968 if pwd and not isinstance(pwd, bytes):
969 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
970 if pwd:
971 self.pwd = pwd
972 else:
973 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000974
R David Murrayf50b38a2012-04-12 18:44:58 -0400975 @property
976 def comment(self):
977 """The comment text associated with the ZIP file."""
978 return self._comment
979
980 @comment.setter
981 def comment(self, comment):
982 if not isinstance(comment, bytes):
983 raise TypeError("comment: expected bytes, got %s" % type(comment))
984 # check for valid comment length
985 if len(comment) >= ZIP_MAX_COMMENT:
986 if self.debug:
987 print('Archive comment is too long; truncating to %d bytes'
988 % ZIP_MAX_COMMENT)
989 comment = comment[:ZIP_MAX_COMMENT]
990 self._comment = comment
991 self._didModify = True
992
Thomas Wouterscf297e42007-02-23 15:07:44 +0000993 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000994 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000995 with self.open(name, "r", pwd) as fp:
996 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000997
998 def open(self, name, mode="r", pwd=None):
999 """Return file-like object for 'name'."""
1000 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +00001001 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +00001002 if pwd and not isinstance(pwd, bytes):
1003 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001005 raise RuntimeError(
1006 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001007
Guido van Rossumd8faa362007-04-27 19:54:29 +00001008 # Only open a new file for instances where we were not
1009 # given a file object in the constructor
1010 if self._filePassed:
1011 zef_file = self.fp
1012 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +00001013 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014
Georg Brandlb533e262008-05-25 18:19:30 +00001015 # Make sure we have an info object
1016 if isinstance(name, ZipInfo):
1017 # 'name' is already an info object
1018 zinfo = name
1019 else:
1020 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001021 try:
1022 zinfo = self.getinfo(name)
1023 except KeyError:
1024 if not self._filePassed:
1025 zef_file.close()
1026 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001027 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001028
1029 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001030 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +00001031 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +00001032 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001033
1034 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001035 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001036 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001037 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001038
Georg Brandl5ba11de2011-01-01 10:09:32 +00001039 if zinfo.flag_bits & 0x800:
1040 # UTF-8 filename
1041 fname_str = fname.decode("utf-8")
1042 else:
1043 fname_str = fname.decode("cp437")
1044
1045 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001046 if not self._filePassed:
1047 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +00001048 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +00001049 'File name in directory %r and header %r differ.'
1050 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001051
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052 # check for encrypted flag & handle password
1053 is_encrypted = zinfo.flag_bits & 0x1
1054 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +00001055 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001056 if not pwd:
1057 pwd = self.pwd
1058 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001059 if not self._filePassed:
1060 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +00001061 raise RuntimeError("File %s is encrypted, "
1062 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001063
Thomas Wouterscf297e42007-02-23 15:07:44 +00001064 zd = _ZipDecrypter(pwd)
1065 # The first 12 bytes in the cypher stream is an encryption header
1066 # used to strengthen the algorithm. The first 11 bytes are
1067 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +00001068 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +00001069 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +00001070 header = zef_file.read(12)
1071 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +00001072 if zinfo.flag_bits & 0x8:
1073 # compare against the file type from extended local headers
1074 check_byte = (zinfo._raw_time >> 8) & 0xff
1075 else:
1076 # compare against the CRC otherwise
1077 check_byte = (zinfo.CRC >> 24) & 0xff
1078 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001079 if not self._filePassed:
1080 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +00001081 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001082
Łukasz Langaa9f054b2010-11-23 00:15:02 +00001083 return ZipExtFile(zef_file, mode, zinfo, zd,
1084 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085
Christian Heimes790c8232008-01-07 21:14:23 +00001086 def extract(self, member, path=None, pwd=None):
1087 """Extract a member from the archive to the current working directory,
1088 using its full name. Its file information is extracted as accurately
1089 as possible. `member' may be a filename or a ZipInfo object. You can
1090 specify a different directory using `path'.
1091 """
1092 if not isinstance(member, ZipInfo):
1093 member = self.getinfo(member)
1094
1095 if path is None:
1096 path = os.getcwd()
1097
1098 return self._extract_member(member, path, pwd)
1099
1100 def extractall(self, path=None, members=None, pwd=None):
1101 """Extract all members from the archive to the current working
1102 directory. `path' specifies a different directory to extract to.
1103 `members' is optional and must be a subset of the list returned
1104 by namelist().
1105 """
1106 if members is None:
1107 members = self.namelist()
1108
1109 for zipinfo in members:
1110 self.extract(zipinfo, path, pwd)
1111
1112 def _extract_member(self, member, targetpath, pwd):
1113 """Extract the ZipInfo object 'member' to a physical
1114 file on the path targetpath.
1115 """
1116 # build the destination pathname, replacing
1117 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001118 # Strip trailing path separator, unless it represents the root.
1119 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1120 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001121 targetpath = targetpath[:-1]
1122
1123 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001124 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001125 targetpath = os.path.join(targetpath, member.filename[1:])
1126 else:
1127 targetpath = os.path.join(targetpath, member.filename)
1128
1129 targetpath = os.path.normpath(targetpath)
1130
1131 # Create all upper directories if necessary.
1132 upperdirs = os.path.dirname(targetpath)
1133 if upperdirs and not os.path.exists(upperdirs):
1134 os.makedirs(upperdirs)
1135
Martin v. Löwis59e47792009-01-24 14:10:07 +00001136 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001137 if not os.path.isdir(targetpath):
1138 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001139 return targetpath
1140
Georg Brandlb533e262008-05-25 18:19:30 +00001141 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001142 target = open(targetpath, "wb")
1143 shutil.copyfileobj(source, target)
1144 source.close()
1145 target.close()
1146
1147 return targetpath
1148
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001149 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001150 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001151 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001152 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001153 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001155 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001156 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001157 raise RuntimeError(
1158 "Attempt to write ZIP archive that was already closed")
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001159 _check_compression(zinfo.compress_type)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001160 if zinfo.file_size > ZIP64_LIMIT:
1161 if not self._allowZip64:
1162 raise LargeZipFile("Filesize would require ZIP64 extensions")
1163 if zinfo.header_offset > ZIP64_LIMIT:
1164 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001165 raise LargeZipFile(
1166 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001167
1168 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001169 """Put the bytes from filename into the archive under the name
1170 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001171 if not self.fp:
1172 raise RuntimeError(
1173 "Attempt to write to ZIP archive that was already closed")
1174
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001176 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001177 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001178 date_time = mtime[0:6]
1179 # Create ZipInfo instance to store file information
1180 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001181 arcname = filename
1182 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1183 while arcname[0] in (os.sep, os.altsep):
1184 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001185 if isdir:
1186 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001187 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001188 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001189 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001190 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 else:
Tim Peterse1190062001-01-15 03:34:38 +00001192 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001193
1194 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001195 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001196 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001197
1198 self._writecheck(zinfo)
1199 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001200
1201 if isdir:
1202 zinfo.file_size = 0
1203 zinfo.compress_size = 0
1204 zinfo.CRC = 0
1205 self.filelist.append(zinfo)
1206 self.NameToInfo[zinfo.filename] = zinfo
1207 self.fp.write(zinfo.FileHeader())
1208 return
1209
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001210 cmpr = _get_compressor(zinfo.compress_type)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001211 with open(filename, "rb") as fp:
1212 # Must overwrite CRC and sizes with correct data later
1213 zinfo.CRC = CRC = 0
1214 zinfo.compress_size = compress_size = 0
1215 zinfo.file_size = file_size = 0
1216 self.fp.write(zinfo.FileHeader())
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001217 while 1:
1218 buf = fp.read(1024 * 8)
1219 if not buf:
1220 break
1221 file_size = file_size + len(buf)
1222 CRC = crc32(buf, CRC) & 0xffffffff
1223 if cmpr:
1224 buf = cmpr.compress(buf)
1225 compress_size = compress_size + len(buf)
1226 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001227 if cmpr:
1228 buf = cmpr.flush()
1229 compress_size = compress_size + len(buf)
1230 self.fp.write(buf)
1231 zinfo.compress_size = compress_size
1232 else:
1233 zinfo.compress_size = file_size
1234 zinfo.CRC = CRC
1235 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001236 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001237 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001238 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001239 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001240 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001241 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242 self.filelist.append(zinfo)
1243 self.NameToInfo[zinfo.filename] = zinfo
1244
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001245 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001246 """Write a file into the archive. The contents is 'data', which
1247 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1248 it is encoded as UTF-8 first.
1249 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001250 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001251 if isinstance(data, str):
1252 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001253 if not isinstance(zinfo_or_arcname, ZipInfo):
1254 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001255 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001256 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001257 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001258 else:
1259 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001260
1261 if not self.fp:
1262 raise RuntimeError(
1263 "Attempt to write to ZIP archive that was already closed")
1264
Guido van Rossum85825dc2007-08-27 17:03:28 +00001265 zinfo.file_size = len(data) # Uncompressed size
1266 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001267 if compress_type is not None:
1268 zinfo.compress_type = compress_type
1269
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001270 self._writecheck(zinfo)
1271 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001272 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001273 co = _get_compressor(zinfo.compress_type)
1274 if co:
Guido van Rossum85825dc2007-08-27 17:03:28 +00001275 data = co.compress(data) + co.flush()
1276 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001277 else:
1278 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001279 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001281 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001282 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001284 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001285 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001286 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001287 self.filelist.append(zinfo)
1288 self.NameToInfo[zinfo.filename] = zinfo
1289
1290 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001291 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001292 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293
1294 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001295 """Close the file, and for mode "w" and "a" write the ending
1296 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001297 if self.fp is None:
1298 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001299
1300 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 count = 0
1302 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001303 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 count = count + 1
1305 dt = zinfo.date_time
1306 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001307 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001308 extra = []
1309 if zinfo.file_size > ZIP64_LIMIT \
1310 or zinfo.compress_size > ZIP64_LIMIT:
1311 extra.append(zinfo.file_size)
1312 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001313 file_size = 0xffffffff
1314 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001315 else:
1316 file_size = zinfo.file_size
1317 compress_size = zinfo.compress_size
1318
1319 if zinfo.header_offset > ZIP64_LIMIT:
1320 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001321 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001322 else:
1323 header_offset = zinfo.header_offset
1324
1325 extra_data = zinfo.extra
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001326 min_version = 0
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001327 if extra:
1328 # Append a ZIP64 field to the extra's
1329 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001330 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001331 1, 8*len(extra), *extra) + extra_data
1332
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001333 min_version = ZIP64_VERSION
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001334
Martin v. Löwisf6b16a42012-05-01 07:58:44 +02001335 if zinfo.compress_type == ZIP_BZIP2:
1336 min_version = max(BZIP2_VERSION, min_version)
1337
1338 extract_version = max(min_version, zinfo.extract_version)
1339 create_version = max(min_version, zinfo.create_version)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001340 try:
1341 filename, flag_bits = zinfo._encodeFilenameFlags()
1342 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001343 stringCentralDir, create_version,
1344 zinfo.create_system, extract_version, zinfo.reserved,
1345 flag_bits, zinfo.compress_type, dostime, dosdate,
1346 zinfo.CRC, compress_size, file_size,
1347 len(filename), len(extra_data), len(zinfo.comment),
1348 0, zinfo.internal_attr, zinfo.external_attr,
1349 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001350 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001351 print((structCentralDir, stringCentralDir, create_version,
1352 zinfo.create_system, extract_version, zinfo.reserved,
1353 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1354 zinfo.CRC, compress_size, file_size,
1355 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1356 0, zinfo.internal_attr, zinfo.external_attr,
1357 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001358 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001359 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001360 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001361 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001362 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001363
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001364 pos2 = self.fp.tell()
1365 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001366 centDirCount = count
1367 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001368 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001369 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1370 centDirOffset > ZIP64_LIMIT or
1371 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001372 # Need to write the ZIP64 end-of-archive records
1373 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001374 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001375 44, 45, 45, 0, 0, centDirCount, centDirCount,
1376 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001377 self.fp.write(zip64endrec)
1378
1379 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001380 structEndArchive64Locator,
1381 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001382 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001383 centDirCount = min(centDirCount, 0xFFFF)
1384 centDirSize = min(centDirSize, 0xFFFFFFFF)
1385 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001386
Georg Brandl2ee470f2008-07-16 12:55:28 +00001387 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001388 0, 0, centDirCount, centDirCount,
R David Murrayf50b38a2012-04-12 18:44:58 -04001389 centDirSize, centDirOffset, len(self._comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001390 self.fp.write(endrec)
R David Murrayf50b38a2012-04-12 18:44:58 -04001391 self.fp.write(self._comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001392 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001393
Fred Drake3d9091e2001-03-26 15:49:24 +00001394 if not self._filePassed:
1395 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001396 self.fp = None
1397
1398
1399class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001400 """Class to create ZIP archives with Python library files and packages."""
1401
Georg Brandl8334fd92010-12-04 10:26:46 +00001402 def __init__(self, file, mode="r", compression=ZIP_STORED,
1403 allowZip64=False, optimize=-1):
1404 ZipFile.__init__(self, file, mode=mode, compression=compression,
1405 allowZip64=allowZip64)
1406 self._optimize = optimize
1407
Georg Brandlfe991052009-09-16 15:54:04 +00001408 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001409 """Add all files from "pathname" to the ZIP archive.
1410
Fred Drake484d7352000-10-02 21:14:52 +00001411 If pathname is a package directory, search the directory and
1412 all package subdirectories recursively for all *.py and enter
1413 the modules into the archive. If pathname is a plain
1414 directory, listdir *.py and enter all modules. Else, pathname
1415 must be a Python *.py file and the module will be put into the
1416 archive. Added modules are always module.pyo or module.pyc.
1417 This method will compile the module.py into module.pyc if
1418 necessary.
1419 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001420 dir, name = os.path.split(pathname)
1421 if os.path.isdir(pathname):
1422 initname = os.path.join(pathname, "__init__.py")
1423 if os.path.isfile(initname):
1424 # This is a package directory, add it
1425 if basename:
1426 basename = "%s/%s" % (basename, name)
1427 else:
1428 basename = name
1429 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001430 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001431 fname, arcname = self._get_codename(initname[0:-3], basename)
1432 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001433 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001434 self.write(fname, arcname)
1435 dirlist = os.listdir(pathname)
1436 dirlist.remove("__init__.py")
1437 # Add all *.py files and package subdirectories
1438 for filename in dirlist:
1439 path = os.path.join(pathname, filename)
1440 root, ext = os.path.splitext(filename)
1441 if os.path.isdir(path):
1442 if os.path.isfile(os.path.join(path, "__init__.py")):
1443 # This is a package directory, add it
1444 self.writepy(path, basename) # Recursive call
1445 elif ext == ".py":
1446 fname, arcname = self._get_codename(path[0:-3],
1447 basename)
1448 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001449 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001450 self.write(fname, arcname)
1451 else:
1452 # This is NOT a package directory, add its files at top level
1453 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001454 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001455 for filename in os.listdir(pathname):
1456 path = os.path.join(pathname, filename)
1457 root, ext = os.path.splitext(filename)
1458 if ext == ".py":
1459 fname, arcname = self._get_codename(path[0:-3],
1460 basename)
1461 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001462 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001463 self.write(fname, arcname)
1464 else:
1465 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001466 raise RuntimeError(
1467 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001468 fname, arcname = self._get_codename(pathname[0:-3], basename)
1469 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001470 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001471 self.write(fname, arcname)
1472
1473 def _get_codename(self, pathname, basename):
1474 """Return (filename, archivename) for the path.
1475
Fred Drake484d7352000-10-02 21:14:52 +00001476 Given a module name path, return the correct file path and
1477 archive name, compiling if necessary. For example, given
1478 /python/lib/string, return (/python/lib/string.pyc, string).
1479 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001480 def _compile(file, optimize=-1):
1481 import py_compile
1482 if self.debug:
1483 print("Compiling", file)
1484 try:
1485 py_compile.compile(file, doraise=True, optimize=optimize)
1486 except py_compile.PyCompileError as error:
1487 print(err.msg)
1488 return False
1489 return True
1490
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001491 file_py = pathname + ".py"
1492 file_pyc = pathname + ".pyc"
1493 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001494 pycache_pyc = imp.cache_from_source(file_py, True)
1495 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001496 if self._optimize == -1:
1497 # legacy mode: use whatever file is present
1498 if (os.path.isfile(file_pyo) and
1499 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1500 # Use .pyo file.
1501 arcname = fname = file_pyo
1502 elif (os.path.isfile(file_pyc) and
1503 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1504 # Use .pyc file.
1505 arcname = fname = file_pyc
1506 elif (os.path.isfile(pycache_pyc) and
1507 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1508 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1509 # file name in the archive.
1510 fname = pycache_pyc
1511 arcname = file_pyc
1512 elif (os.path.isfile(pycache_pyo) and
1513 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1514 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1515 # file name in the archive.
1516 fname = pycache_pyo
1517 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001518 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001519 # Compile py into PEP 3147 pyc file.
1520 if _compile(file_py):
1521 fname = (pycache_pyc if __debug__ else pycache_pyo)
1522 arcname = (file_pyc if __debug__ else file_pyo)
1523 else:
1524 fname = arcname = file_py
1525 else:
1526 # new mode: use given optimization level
1527 if self._optimize == 0:
1528 fname = pycache_pyc
1529 arcname = file_pyc
1530 else:
1531 fname = pycache_pyo
1532 arcname = file_pyo
1533 if not (os.path.isfile(fname) and
1534 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1535 if not _compile(file_py, optimize=self._optimize):
1536 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001537 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001538 if basename:
1539 archivename = "%s/%s" % (basename, archivename)
1540 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001541
1542
1543def main(args = None):
1544 import textwrap
1545 USAGE=textwrap.dedent("""\
1546 Usage:
1547 zipfile.py -l zipfile.zip # Show listing of a zipfile
1548 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1549 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1550 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1551 """)
1552 if args is None:
1553 args = sys.argv[1:]
1554
1555 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001556 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001557 sys.exit(1)
1558
1559 if args[0] == '-l':
1560 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001561 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001562 sys.exit(1)
1563 zf = ZipFile(args[1], 'r')
1564 zf.printdir()
1565 zf.close()
1566
1567 elif args[0] == '-t':
1568 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001569 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001570 sys.exit(1)
1571 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001572 badfile = zf.testzip()
1573 if badfile:
1574 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001575 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001576
1577 elif args[0] == '-e':
1578 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001579 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001580 sys.exit(1)
1581
1582 zf = ZipFile(args[1], 'r')
1583 out = args[2]
1584 for path in zf.namelist():
1585 if path.startswith('./'):
1586 tgt = os.path.join(out, path[2:])
1587 else:
1588 tgt = os.path.join(out, path)
1589
1590 tgtdir = os.path.dirname(tgt)
1591 if not os.path.exists(tgtdir):
1592 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001593 with open(tgt, 'wb') as fp:
1594 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001595 zf.close()
1596
1597 elif args[0] == '-c':
1598 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001599 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001600 sys.exit(1)
1601
1602 def addToZip(zf, path, zippath):
1603 if os.path.isfile(path):
1604 zf.write(path, zippath, ZIP_DEFLATED)
1605 elif os.path.isdir(path):
1606 for nm in os.listdir(path):
1607 addToZip(zf,
1608 os.path.join(path, nm), os.path.join(zippath, nm))
1609 # else: ignore
1610
1611 zf = ZipFile(args[1], 'w', allowZip64=True)
1612 for src in args[2:]:
1613 addToZip(zf, src, os.path.basename(src))
1614
1615 zf.close()
1616
1617if __name__ == "__main__":
1618 main()