blob: 7619cfee78d3de83ee873a655191daad41073f18 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Serhiy Storchaka7c068752013-02-02 12:30:49 +02008import string
Guido van Rossum32abe6f2000-03-31 17:30:02 +00009
10try:
Tim Peterse1190062001-01-15 03:34:38 +000011 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000013except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000014 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000015 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
Skip Montanaro40fc1602001-03-01 04:27:19 +000017__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000018 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000019
Fred Drake5db246d2000-09-29 20:44:48 +000020class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000022
23
24class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000025 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000026 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27 and those extensions are disabled.
28 """
29
Tim Peterse1190062001-01-15 03:34:38 +000030error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000032ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchaka45efb222014-09-23 21:33:52 +030033ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000034ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000035
Guido van Rossum32abe6f2000-03-31 17:30:02 +000036# constants for Zip file compression methods
37ZIP_STORED = 0
38ZIP_DEFLATED = 8
39# Other ZIP compression methods not supported
40
Martin v. Löwis8c436412008-07-03 12:51:14 +000041# Below are some formats and associated data for reading/writing headers using
42# the struct module. The names and structures of headers/records are those used
43# in the PKWARE description of the ZIP file format:
44# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000046
Martin v. Löwis8c436412008-07-03 12:51:14 +000047# The "end of central directory" structure, magic number, size, and indices
48# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000049structEndArchive = "<4s4H2LH"
50stringEndArchive = "PK\005\006"
51sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000052
53_ECD_SIGNATURE = 0
54_ECD_DISK_NUMBER = 1
55_ECD_DISK_START = 2
56_ECD_ENTRIES_THIS_DISK = 3
57_ECD_ENTRIES_TOTAL = 4
58_ECD_SIZE = 5
59_ECD_OFFSET = 6
60_ECD_COMMENT_SIZE = 7
61# These last two indices are not part of the structure as defined in the
62# spec, but they are used internally by this module as a convenience
63_ECD_COMMENT = 8
64_ECD_LOCATION = 9
65
66# The "central directory" structure, magic number, size, and indices
67# of entries in the structure (section V.F in the format document)
68structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000069stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000070sizeCentralDir = struct.calcsize(structCentralDir)
71
Fred Drake3e038e52001-02-28 17:56:26 +000072# indexes of entries in the central directory structure
73_CD_SIGNATURE = 0
74_CD_CREATE_VERSION = 1
75_CD_CREATE_SYSTEM = 2
76_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000077_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000078_CD_FLAG_BITS = 5
79_CD_COMPRESS_TYPE = 6
80_CD_TIME = 7
81_CD_DATE = 8
82_CD_CRC = 9
83_CD_COMPRESSED_SIZE = 10
84_CD_UNCOMPRESSED_SIZE = 11
85_CD_FILENAME_LENGTH = 12
86_CD_EXTRA_FIELD_LENGTH = 13
87_CD_COMMENT_LENGTH = 14
88_CD_DISK_NUMBER_START = 15
89_CD_INTERNAL_FILE_ATTRIBUTES = 16
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91_CD_LOCAL_HEADER_OFFSET = 18
92
Martin v. Löwis8c436412008-07-03 12:51:14 +000093# The "local file header" structure, magic number, size, and indices
94# (section V.A in the format document)
95structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000096stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000097sizeFileHeader = struct.calcsize(structFileHeader)
98
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_SIGNATURE = 0
100_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000101_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000102_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103_FH_COMPRESSION_METHOD = 4
104_FH_LAST_MOD_TIME = 5
105_FH_LAST_MOD_DATE = 6
106_FH_CRC = 7
107_FH_COMPRESSED_SIZE = 8
108_FH_UNCOMPRESSED_SIZE = 9
109_FH_FILENAME_LENGTH = 10
110_FH_EXTRA_FIELD_LENGTH = 11
111
Martin v. Löwis8c436412008-07-03 12:51:14 +0000112# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000113structEndArchive64Locator = "<4sLQL"
114stringEndArchive64Locator = "PK\x06\x07"
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000116
117# The "Zip64 end of central directory" record, magic number, size, and indices
118# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000119structEndArchive64 = "<4sQ2H2L4Q"
120stringEndArchive64 = "PK\x06\x06"
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000122
123_CD64_SIGNATURE = 0
124_CD64_DIRECTORY_RECSIZE = 1
125_CD64_CREATE_VERSION = 2
126_CD64_EXTRACT_VERSION = 3
127_CD64_DISK_NUMBER = 4
128_CD64_DISK_NUMBER_START = 5
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130_CD64_NUMBER_ENTRIES_TOTAL = 7
131_CD64_DIRECTORY_SIZE = 8
132_CD64_OFFSET_START_CENTDIR = 9
133
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000134def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000135 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000136 if _EndRecData(fp):
137 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000138 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000139 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000140 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000141
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000142def is_zipfile(filename):
143 """Quickly see if a file is a ZIP file by checking the magic number.
144
145 The filename argument may be a file or file-like object too.
146 """
147 result = False
148 try:
149 if hasattr(filename, "read"):
150 result = _check_zipfile(fp=filename)
151 else:
152 with open(filename, "rb") as fp:
153 result = _check_zipfile(fp)
154 except IOError:
155 pass
156 return result
157
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000158def _EndRecData64(fpin, offset, endrec):
159 """
160 Read the ZIP64 end-of-archive records and use that to update endrec
161 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000162 try:
163 fpin.seek(offset - sizeEndCentDir64Locator, 2)
164 except IOError:
165 # If the seek fails, the file is not large enough to contain a ZIP64
166 # end-of-archive record, so just return the end record we were given.
167 return endrec
168
Martin v. Löwis8c436412008-07-03 12:51:14 +0000169 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200170 if len(data) != sizeEndCentDir64Locator:
171 return endrec
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000174 return endrec
175
176 if diskno != 0 or disks != 1:
177 raise BadZipfile("zipfiles that span multiple disks are not supported")
178
Tim Petersa608bb22006-06-15 18:06:29 +0000179 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181 data = fpin.read(sizeEndCentDir64)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200182 if len(data) != sizeEndCentDir64:
183 return endrec
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000184 sig, sz, create_version, read_version, disk_num, disk_dir, \
185 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000186 struct.unpack(structEndArchive64, data)
187 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000188 return endrec
189
190 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000191 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000192 endrec[_ECD_DISK_NUMBER] = disk_num
193 endrec[_ECD_DISK_START] = disk_dir
194 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195 endrec[_ECD_ENTRIES_TOTAL] = dircount2
196 endrec[_ECD_SIZE] = dirsize
197 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000198 return endrec
199
200
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000201def _EndRecData(fpin):
202 """Return data from the "End of Central Directory" record, or None.
203
204 The data is a list of the nine items in the ZIP "End of central dir"
205 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000206
207 # Determine file size
208 fpin.seek(0, 2)
209 filesize = fpin.tell()
210
211 # Check to see if this is ZIP file with no archive comment (the
212 # "end of central directory" structure should be the last item in the
213 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000214 try:
215 fpin.seek(-sizeEndCentDir, 2)
216 except IOError:
217 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000218 data = fpin.read()
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200219 if (len(data) == sizeEndCentDir and
220 data[0:4] == stringEndArchive and
221 data[-2:] == b"\000\000"):
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000223 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000224 endrec=list(endrec)
225
226 # Append a blank comment and record start offset
227 endrec.append("")
228 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000229
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000230 # Try to read the "Zip64 end of central directory" structure
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000232
233 # Either this is not a ZIP file, or it is a ZIP file with an archive
234 # comment. Search the end of the file for the "end of central directory"
235 # record signature. The comment is the last item in the ZIP file and may be
236 # up to 64K long. It is assumed that the "end of central directory" magic
237 # number does not appear in the comment.
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000240 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000241 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000242 if start >= 0:
243 # found the magic number; attempt to unpack and interpret
244 recData = data[start:start+sizeEndCentDir]
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200245 if len(recData) != sizeEndCentDir:
246 # Zip file is corrupted.
247 return None
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000253
R David Murray873c5832011-06-09 16:01:09 -0400254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000257
258 # Unable to find a valid end of central directory structure
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200259 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000260
Fred Drake484d7352000-10-02 21:14:52 +0000261
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000263 """Class with attributes describing each file in the ZIP archive."""
264
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000284 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 )
286
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000288 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000299 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000300
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800303
304 if date_time[0] < 1980:
305 raise ValueError('ZIP does not support timestamps before 1980')
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000308 self.compress_type = ZIP_STORED # Type of compression for the file
309 self.comment = "" # Comment for each file
310 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000311 if sys.platform == 'win32':
312 self.create_system = 0 # System which created ZIP archive
313 else:
314 # Assume everything else is unix-y
315 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000316 self.create_version = 20 # Version which created ZIP archive
317 self.extract_version = 20 # Version needed to extract archive
318 self.reserved = 0 # Must be zero
319 self.flag_bits = 0 # ZIP flag bits
320 self.volume = 0 # Volume number of file header
321 self.internal_attr = 0 # Internal attributes
322 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000324 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000325 # CRC CRC-32 of the uncompressed file
326 # compress_size Size of the compressed file
327 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200329 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000330 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 dt = self.date_time
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # Set these to zero because we write them after the file data
336 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 else:
Tim Peterse1190062001-01-15 03:34:38 +0000338 CRC = self.CRC
339 compress_size = self.compress_size
340 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000341
342 extra = self.extra
343
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200344 if zip64 is None:
345 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346 if zip64:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000347 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000348 extra = extra + struct.pack(fmt,
349 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200350 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351 if not zip64:
352 raise LargeZipFile("Filesize would require ZIP64 extensions")
353 # File is larger than what fits into a 4 byte integer,
354 # fall back to the ZIP64 extension
Martin v. Löwis8c436412008-07-03 12:51:14 +0000355 file_size = 0xffffffff
356 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000357 self.extract_version = max(45, self.extract_version)
358 self.create_version = max(45, self.extract_version)
359
Martin v. Löwis471617d2008-05-05 17:16:58 +0000360 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000361 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000362 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363 self.compress_type, dostime, dosdate, CRC,
364 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000365 len(filename), len(extra))
366 return header + filename + extra
367
368 def _encodeFilenameFlags(self):
369 if isinstance(self.filename, unicode):
370 try:
371 return self.filename.encode('ascii'), self.flag_bits
372 except UnicodeEncodeError:
373 return self.filename.encode('utf-8'), self.flag_bits | 0x800
374 else:
375 return self.filename, self.flag_bits
376
377 def _decodeFilename(self):
378 if self.flag_bits & 0x800:
379 return self.filename.decode('utf-8')
380 else:
381 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000383 def _decodeExtra(self):
384 # Try to decode the extra field.
385 extra = self.extra
386 unpack = struct.unpack
Gregory P. Smith0344a062014-05-29 23:41:52 -0700387 while len(extra) >= 4:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000388 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000389 if tp == 1:
390 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000391 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000392 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000393 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000395 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000396 elif ln == 0:
397 counts = ()
398 else:
399 raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401 idx = 0
402
403 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000404 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000405 self.file_size = counts[idx]
406 idx += 1
407
Martin v. Löwis8c436412008-07-03 12:51:14 +0000408 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000409 self.compress_size = counts[idx]
410 idx += 1
411
Martin v. Löwis8c436412008-07-03 12:51:14 +0000412 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000413 old = self.header_offset
414 self.header_offset = counts[idx]
415 idx+=1
416
417 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000418
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000419
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000420class _ZipDecrypter:
421 """Class to handle decryption of files stored within a ZIP archive.
422
423 ZIP supports a password-based form of encryption. Even though known
424 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000425 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000426
427 Usage:
428 zd = _ZipDecrypter(mypwd)
429 plain_char = zd(cypher_char)
430 plain_text = map(zd, cypher_text)
431 """
432
433 def _GenerateCRCTable():
434 """Generate a CRC-32 table.
435
436 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437 internal keys. We noticed that a direct implementation is faster than
438 relying on binascii.crc32().
439 """
440 poly = 0xedb88320
441 table = [0] * 256
442 for i in range(256):
443 crc = i
444 for j in range(8):
445 if crc & 1:
446 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447 else:
448 crc = ((crc >> 1) & 0x7FFFFFFF)
449 table[i] = crc
450 return table
451 crctable = _GenerateCRCTable()
452
453 def _crc32(self, ch, crc):
454 """Compute the CRC32 primitive on one byte."""
455 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457 def __init__(self, pwd):
458 self.key0 = 305419896
459 self.key1 = 591751049
460 self.key2 = 878082192
461 for p in pwd:
462 self._UpdateKeys(p)
463
464 def _UpdateKeys(self, c):
465 self.key0 = self._crc32(c, self.key0)
466 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470 def __call__(self, c):
471 """Decrypt a single character."""
472 c = ord(c)
473 k = self.key2 | 2
474 c = c ^ (((k * (k^1)) >> 8) & 255)
475 c = chr(c)
476 self._UpdateKeys(c)
477 return c
478
Ezio Melotti9e949722012-11-18 13:18:06 +0200479
480compressor_names = {
481 0: 'store',
482 1: 'shrink',
483 2: 'reduce',
484 3: 'reduce',
485 4: 'reduce',
486 5: 'reduce',
487 6: 'implode',
488 7: 'tokenize',
489 8: 'deflate',
490 9: 'deflate64',
491 10: 'implode',
492 12: 'bzip2',
493 14: 'lzma',
494 18: 'terse',
495 19: 'lz77',
496 97: 'wavpack',
497 98: 'ppmd',
498}
499
500
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200501class _SharedFile:
502 def __init__(self, file, pos, close):
503 self._file = file
504 self._pos = pos
505 self._close = close
506
507 def read(self, n=-1):
508 self._file.seek(self._pos)
509 data = self._file.read(n)
510 self._pos = self._file.tell()
511 return data
512
513 def close(self):
514 if self._file is not None:
515 fileobj = self._file
516 self._file = None
517 self._close(fileobj)
518
519
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000520class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000521 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000522 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000523 """
Tim Petersea5962f2007-03-12 18:07:52 +0000524
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000525 # Max size supported by decompressor.
526 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000527
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000528 # Read from compressed files in 4k blocks.
529 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000530
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000531 # Search for universal newlines or line chunks.
532 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
533
Jesus Cea93d628b2012-11-04 02:32:08 +0100534 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
535 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000536 self._fileobj = fileobj
537 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100538 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000539
Ezio Melotti4611b052010-01-28 01:41:30 +0000540 self._compress_type = zipinfo.compress_type
541 self._compress_size = zipinfo.compress_size
542 self._compress_left = zipinfo.compress_size
543
544 if self._compress_type == ZIP_DEFLATED:
545 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti9e949722012-11-18 13:18:06 +0200546 elif self._compress_type != ZIP_STORED:
547 descr = compressor_names.get(self._compress_type)
548 if descr:
549 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
550 else:
551 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000552 self._unconsumed = ''
553
554 self._readbuffer = ''
555 self._offset = 0
556
557 self._universal = 'U' in mode
558 self.newlines = None
559
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000560 # Adjust read size for encrypted files since the first 12 bytes
561 # are for the encryption/password information.
562 if self._decrypter is not None:
563 self._compress_left -= 12
564
565 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000566 self.name = zipinfo.filename
567
Antoine Pitroue1436d12010-08-12 15:25:51 +0000568 if hasattr(zipinfo, 'CRC'):
569 self._expected_crc = zipinfo.CRC
570 self._running_crc = crc32(b'') & 0xffffffff
571 else:
572 self._expected_crc = None
573
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000574 def readline(self, limit=-1):
575 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000576
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000577 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000578 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000579
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000580 if not self._universal and limit < 0:
581 # Shortcut common case - newline found in buffer.
582 i = self._readbuffer.find('\n', self._offset) + 1
583 if i > 0:
584 line = self._readbuffer[self._offset: i]
585 self._offset = i
586 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000587
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000588 if not self._universal:
589 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000590
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000591 line = ''
592 while limit < 0 or len(line) < limit:
593 readahead = self.peek(2)
594 if readahead == '':
595 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000596
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000597 #
598 # Search for universal newlines or line chunks.
599 #
600 # The pattern returns either a line chunk or a newline, but not
601 # both. Combined with peek(2), we are assured that the sequence
602 # '\r\n' is always retrieved completely and never split into
603 # separate newlines - '\r', '\n' due to coincidental readaheads.
604 #
605 match = self.PATTERN.search(readahead)
606 newline = match.group('newline')
607 if newline is not None:
608 if self.newlines is None:
609 self.newlines = []
610 if newline not in self.newlines:
611 self.newlines.append(newline)
612 self._offset += len(newline)
613 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000614
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000615 chunk = match.group('chunk')
616 if limit >= 0:
617 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000618
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000619 self._offset += len(chunk)
620 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000621
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000622 return line
623
624 def peek(self, n=1):
625 """Returns buffered bytes without advancing the position."""
626 if n > len(self._readbuffer) - self._offset:
627 chunk = self.read(n)
Serhiy Storchakad1051962013-12-21 23:51:15 +0200628 if len(chunk) > self._offset:
629 self._readbuffer = chunk + self._readbuffer[self._offset:]
630 self._offset = 0
631 else:
632 self._offset -= len(chunk)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000633
634 # Return up to 512 bytes to reduce allocation overhead for tight loops.
635 return self._readbuffer[self._offset: self._offset + 512]
636
637 def readable(self):
638 return True
639
640 def read(self, n=-1):
641 """Read and return up to n bytes.
642 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000643 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000644 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000645 if n is None:
646 n = -1
647 while True:
648 if n < 0:
649 data = self.read1(n)
650 elif n > len(buf):
651 data = self.read1(n - len(buf))
652 else:
653 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000654 if len(data) == 0:
655 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000656 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000657
Antoine Pitroue1436d12010-08-12 15:25:51 +0000658 def _update_crc(self, newdata, eof):
659 # Update the CRC using the given data.
660 if self._expected_crc is None:
661 # No need to compute the CRC if we don't have a reference value
662 return
663 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
664 # Check the CRC if we're at the end of the file
665 if eof and self._running_crc != self._expected_crc:
666 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
667
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000668 def read1(self, n):
669 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000670
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000671 # Simplify algorithm (branching) by transforming negative n to large n.
672 if n < 0 or n is None:
673 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000674
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000675 # Bytes available in read buffer.
676 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000677
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000678 # Read from file.
679 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
680 nbytes = n - len_readbuffer - len(self._unconsumed)
681 nbytes = max(nbytes, self.MIN_READ_SIZE)
682 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000683
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000684 data = self._fileobj.read(nbytes)
685 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000686
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000687 if data and self._decrypter is not None:
688 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000689
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000690 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000691 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000692 self._readbuffer = self._readbuffer[self._offset:] + data
693 self._offset = 0
694 else:
695 # Prepare deflated bytes for decompression.
696 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000697
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000698 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000699 if (len(self._unconsumed) > 0 and n > len_readbuffer and
700 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000701 data = self._decompressor.decompress(
702 self._unconsumed,
703 max(n - len_readbuffer, self.MIN_READ_SIZE)
704 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000705
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000706 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000707 eof = len(self._unconsumed) == 0 and self._compress_left == 0
708 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000709 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000710
Antoine Pitroue1436d12010-08-12 15:25:51 +0000711 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000712 self._readbuffer = self._readbuffer[self._offset:] + data
713 self._offset = 0
714
715 # Read from buffer.
716 data = self._readbuffer[self._offset: self._offset + n]
717 self._offset += len(data)
718 return data
719
Jesus Cea93d628b2012-11-04 02:32:08 +0100720 def close(self):
721 try :
722 if self._close_fileobj:
723 self._fileobj.close()
724 finally:
725 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000726
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000727
R David Murray3f4ccba2012-04-12 18:42:47 -0400728class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000729 """ Class with methods to open, read, write, close, list zip files.
730
Martin v. Löwis8c436412008-07-03 12:51:14 +0000731 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000732
Fred Drake3d9091e2001-03-26 15:49:24 +0000733 file: Either the path to the file, or a file-like object.
734 If it is a path, the file will be opened and closed by ZipFile.
735 mode: The mode can be either read "r", write "w" or append "a".
736 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000737 allowZip64: if True ZipFile will create files with ZIP64 extensions when
738 needed, otherwise it will raise an exception when this would
739 be necessary.
740
Fred Drake3d9091e2001-03-26 15:49:24 +0000741 """
Fred Drake484d7352000-10-02 21:14:52 +0000742
Fred Drake90eac282001-02-28 05:29:34 +0000743 fp = None # Set here since __del__ checks it
744
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000745 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000746 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000747 if mode not in ("r", "w", "a"):
748 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
749
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000750 if compression == ZIP_STORED:
751 pass
752 elif compression == ZIP_DEFLATED:
753 if not zlib:
754 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000755 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000756 else:
757 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000758
759 self._allowZip64 = allowZip64
760 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000761 self.debug = 0 # Level of printing: 0 through 3
762 self.NameToInfo = {} # Find file info given name
763 self.filelist = [] # List of ZipInfo instances for archive
764 self.compression = compression # Method of compression
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200765 self.mode = mode
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000766 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400767 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000768
Fred Drake3d9091e2001-03-26 15:49:24 +0000769 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000770 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000771 self._filePassed = 0
772 self.filename = file
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200773 modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
774 'r+b': 'w+b', 'w+b': 'wb'}
775 filemode = modeDict[mode]
776 while True:
777 try:
778 self.fp = io.open(file, filemode)
779 except IOError:
780 if filemode in modeDict:
781 filemode = modeDict[filemode]
782 continue
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000783 raise
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200784 break
Fred Drake3d9091e2001-03-26 15:49:24 +0000785 else:
786 self._filePassed = 1
787 self.fp = file
788 self.filename = getattr(file, 'name', None)
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200789 self._fileRefCnt = 1
Tim Petersa19a1682001-03-29 04:36:09 +0000790
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100791 try:
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200792 if mode == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000793 self._RealGetContents()
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200794 elif mode == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000795 # set the modified flag so central directory gets written
796 # even if no files are added to the archive
797 self._didModify = True
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200798 self.start_dir = 0
799 elif mode == 'a':
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100800 try:
801 # See if file is a zip file
802 self._RealGetContents()
803 # seek to start of directory and overwrite
804 self.fp.seek(self.start_dir, 0)
805 except BadZipfile:
806 # file is not a zip file, just append
807 self.fp.seek(0, 2)
808
809 # set the modified flag so central directory gets written
810 # even if no files are added to the archive
811 self._didModify = True
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200812 self.start_dir = self.fp.tell()
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100813 else:
814 raise RuntimeError('Mode must be "r", "w" or "a"')
815 except:
816 fp = self.fp
817 self.fp = None
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200818 self._fpclose(fp)
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100819 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000820
Ezio Melotti569e61f2009-12-30 06:14:51 +0000821 def __enter__(self):
822 return self
823
824 def __exit__(self, type, value, traceback):
825 self.close()
826
Tim Peters7d3bad62001-04-04 18:56:49 +0000827 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000828 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000830 try:
831 endrec = _EndRecData(fp)
832 except IOError:
833 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000834 if not endrec:
835 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000836 if self.debug > 1:
837 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000838 size_cd = endrec[_ECD_SIZE] # bytes in central directory
839 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400840 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000841
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000842 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000843 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000844 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
845 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000846 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
847
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000849 inferred = concat + offset_cd
850 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000851 # self.start_dir: Position of start of central directory
852 self.start_dir = offset_cd + concat
853 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000854 data = fp.read(size_cd)
855 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000856 total = 0
857 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000858 centdir = fp.read(sizeCentralDir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200859 if len(centdir) != sizeCentralDir:
860 raise BadZipfile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000861 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200862 if centdir[_CD_SIGNATURE] != stringCentralDir:
863 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000864 if self.debug > 2:
865 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000866 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000867 # Create ZipInfo instance to store file information
868 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000869 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
870 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000871 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000872 (x.create_version, x.create_system, x.extract_version, x.reserved,
873 x.flag_bits, x.compress_type, t, d,
874 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
875 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
876 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000877 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000878 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000879 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000880
881 x._decodeExtra()
882 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000883 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000884 self.filelist.append(x)
885 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000886
887 # update total bytes read from central directory
888 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
889 + centdir[_CD_EXTRA_FIELD_LENGTH]
890 + centdir[_CD_COMMENT_LENGTH])
891
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000892 if self.debug > 2:
893 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000894
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000895
896 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000897 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 l = []
899 for data in self.filelist:
900 l.append(data.filename)
901 return l
902
903 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000904 """Return a list of class ZipInfo instances for files in the
905 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000906 return self.filelist
907
908 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000909 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000910 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
911 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000912 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000913 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
914
915 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000916 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000917 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000918 for zinfo in self.filelist:
919 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000920 # Read by chunks, to avoid an OverflowError or a
921 # MemoryError with very large embedded files.
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100922 with self.open(zinfo.filename, "r") as f:
923 while f.read(chunk_size): # Check CRC-32
924 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000925 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000926 return zinfo.filename
927
928 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000929 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000930 info = self.NameToInfo.get(name)
931 if info is None:
932 raise KeyError(
933 'There is no item named %r in the archive' % name)
934
935 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000936
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000937 def setpassword(self, pwd):
938 """Set default password for encrypted files."""
939 self.pwd = pwd
940
R David Murray3f4ccba2012-04-12 18:42:47 -0400941 @property
942 def comment(self):
943 """The comment text associated with the ZIP file."""
944 return self._comment
945
946 @comment.setter
947 def comment(self, comment):
948 # check for valid comment length
Serhiy Storchaka49259352014-01-20 21:57:09 +0200949 if len(comment) > ZIP_MAX_COMMENT:
950 import warnings
951 warnings.warn('Archive comment is too long; truncating to %d bytes'
952 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murray3f4ccba2012-04-12 18:42:47 -0400953 comment = comment[:ZIP_MAX_COMMENT]
954 self._comment = comment
955 self._didModify = True
956
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000957 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000958 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000959 return self.open(name, "r", pwd).read()
960
961 def open(self, name, mode="r", pwd=None):
962 """Return file-like object for 'name'."""
963 if mode not in ("r", "U", "rU"):
964 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000965 if not self.fp:
966 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000967 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000968
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200969 # Make sure we have an info object
970 if isinstance(name, ZipInfo):
971 # 'name' is already an info object
972 zinfo = name
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000973 else:
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200974 # Get info object for name
975 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000976
Serhiy Storchaka45aa7712014-12-03 09:11:12 +0200977 self._fileRefCnt += 1
978 zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100979 try:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100980 # Skip the file header:
981 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200982 if len(fheader) != sizeFileHeader:
983 raise BadZipfile("Truncated file header")
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100984 fheader = struct.unpack(structFileHeader, fheader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200985 if fheader[_FH_SIGNATURE] != stringFileHeader:
986 raise BadZipfile("Bad magic number for file header")
987
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100988 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
989 if fheader[_FH_EXTRA_FIELD_LENGTH]:
990 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
991
992 if fname != zinfo.orig_filename:
993 raise BadZipfile, \
994 'File name in directory "%s" and header "%s" differ.' % (
995 zinfo.orig_filename, fname)
996
997 # check for encrypted flag & handle password
998 is_encrypted = zinfo.flag_bits & 0x1
999 zd = None
1000 if is_encrypted:
1001 if not pwd:
1002 pwd = self.pwd
1003 if not pwd:
1004 raise RuntimeError, "File %s is encrypted, " \
1005 "password required for extraction" % name
1006
1007 zd = _ZipDecrypter(pwd)
1008 # The first 12 bytes in the cypher stream is an encryption header
1009 # used to strengthen the algorithm. The first 11 bytes are
1010 # completely random, while the 12th contains the MSB of the CRC,
1011 # or the MSB of the file time depending on the header type
1012 # and is used to check the correctness of the password.
1013 bytes = zef_file.read(12)
1014 h = map(zd, bytes[0:12])
1015 if zinfo.flag_bits & 0x8:
1016 # compare against the file type from extended local headers
1017 check_byte = (zinfo._raw_time >> 8) & 0xff
1018 else:
1019 # compare against the CRC otherwise
1020 check_byte = (zinfo.CRC >> 24) & 0xff
1021 if ord(h[11]) != check_byte:
1022 raise RuntimeError("Bad password for file", name)
1023
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001024 return ZipExtFile(zef_file, mode, zinfo, zd, True)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001025 except:
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001026 zef_file.close()
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001027 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001028
Georg Brandl62416bc2008-01-07 18:47:44 +00001029 def extract(self, member, path=None, pwd=None):
1030 """Extract a member from the archive to the current working directory,
1031 using its full name. Its file information is extracted as accurately
1032 as possible. `member' may be a filename or a ZipInfo object. You can
1033 specify a different directory using `path'.
1034 """
1035 if not isinstance(member, ZipInfo):
1036 member = self.getinfo(member)
1037
1038 if path is None:
1039 path = os.getcwd()
1040
1041 return self._extract_member(member, path, pwd)
1042
1043 def extractall(self, path=None, members=None, pwd=None):
1044 """Extract all members from the archive to the current working
1045 directory. `path' specifies a different directory to extract to.
1046 `members' is optional and must be a subset of the list returned
1047 by namelist().
1048 """
1049 if members is None:
1050 members = self.namelist()
1051
1052 for zipinfo in members:
1053 self.extract(zipinfo, path, pwd)
1054
1055 def _extract_member(self, member, targetpath, pwd):
1056 """Extract the ZipInfo object 'member' to a physical
1057 file on the path targetpath.
1058 """
1059 # build the destination pathname, replacing
1060 # forward slashes to platform specific separators.
Gregory P. Smith608cc452013-02-01 11:40:18 -08001061 arcname = member.filename.replace('/', os.path.sep)
Georg Brandl62416bc2008-01-07 18:47:44 +00001062
Gregory P. Smith608cc452013-02-01 11:40:18 -08001063 if os.path.altsep:
1064 arcname = arcname.replace(os.path.altsep, os.path.sep)
1065 # interpret absolute pathname as relative, remove drive letter or
1066 # UNC path, redundant separators, "." and ".." components.
1067 arcname = os.path.splitdrive(arcname)[1]
1068 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1069 if x not in ('', os.path.curdir, os.path.pardir))
Gregory P. Smith608cc452013-02-01 11:40:18 -08001070 if os.path.sep == '\\':
Serhiy Storchaka13e56c72013-02-02 17:46:33 +02001071 # filter illegal characters on Windows
Gregory P. Smith608cc452013-02-01 11:40:18 -08001072 illegal = ':<>|"?*'
Serhiy Storchaka6fa83f92013-04-13 12:28:17 +03001073 if isinstance(arcname, unicode):
1074 table = {ord(c): ord('_') for c in illegal}
1075 else:
1076 table = string.maketrans(illegal, '_' * len(illegal))
Gregory P. Smith608cc452013-02-01 11:40:18 -08001077 arcname = arcname.translate(table)
Serhiy Storchaka13e56c72013-02-02 17:46:33 +02001078 # remove trailing dots
1079 arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1080 arcname = os.path.sep.join(x for x in arcname if x)
Georg Brandl62416bc2008-01-07 18:47:44 +00001081
Gregory P. Smith608cc452013-02-01 11:40:18 -08001082 targetpath = os.path.join(targetpath, arcname)
Georg Brandl62416bc2008-01-07 18:47:44 +00001083 targetpath = os.path.normpath(targetpath)
1084
1085 # Create all upper directories if necessary.
1086 upperdirs = os.path.dirname(targetpath)
1087 if upperdirs and not os.path.exists(upperdirs):
1088 os.makedirs(upperdirs)
1089
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001090 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001091 if not os.path.isdir(targetpath):
1092 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001093 return targetpath
1094
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001095 with self.open(member, pwd=pwd) as source, \
1096 file(targetpath, "wb") as target:
1097 shutil.copyfileobj(source, target)
Georg Brandl62416bc2008-01-07 18:47:44 +00001098
1099 return targetpath
1100
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001102 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001103 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka49259352014-01-20 21:57:09 +02001104 import warnings
1105 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001106 if self.mode not in ("w", "a"):
1107 raise RuntimeError, 'write() requires mode "w" or "a"'
1108 if not self.fp:
1109 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001110 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1112 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001113 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1115 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001116 "That compression method is not supported"
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001117 if not self._allowZip64:
1118 requires_zip64 = None
1119 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1120 requires_zip64 = "Files count"
1121 elif zinfo.file_size > ZIP64_LIMIT:
1122 requires_zip64 = "Filesize"
1123 elif zinfo.header_offset > ZIP64_LIMIT:
1124 requires_zip64 = "Zipfile size"
1125 if requires_zip64:
1126 raise LargeZipFile(requires_zip64 +
1127 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001128
1129 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001130 """Put the bytes from filename into the archive under the name
1131 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001132 if not self.fp:
1133 raise RuntimeError(
1134 "Attempt to write to ZIP archive that was already closed")
1135
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001136 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001137 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001138 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 date_time = mtime[0:6]
1140 # Create ZipInfo instance to store file information
1141 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001142 arcname = filename
1143 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1144 while arcname[0] in (os.sep, os.altsep):
1145 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001146 if isdir:
1147 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001148 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001149 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001150 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001151 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001152 else:
Tim Peterse1190062001-01-15 03:34:38 +00001153 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001154
1155 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001156 zinfo.flag_bits = 0x00
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001157 self.fp.seek(self.start_dir, 0)
Tim Peterse1190062001-01-15 03:34:38 +00001158 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001159
1160 self._writecheck(zinfo)
1161 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001162
1163 if isdir:
1164 zinfo.file_size = 0
1165 zinfo.compress_size = 0
1166 zinfo.CRC = 0
Serhiy Storchaka6d343e72014-09-23 22:39:59 +03001167 zinfo.external_attr |= 0x10 # MS-DOS directory flag
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001168 self.filelist.append(zinfo)
1169 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001170 self.fp.write(zinfo.FileHeader(False))
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001171 self.start_dir = self.fp.tell()
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001172 return
1173
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001174 with open(filename, "rb") as fp:
1175 # Must overwrite CRC and sizes with correct data later
1176 zinfo.CRC = CRC = 0
1177 zinfo.compress_size = compress_size = 0
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001178 # Compressed size can be larger than uncompressed size
1179 zip64 = self._allowZip64 and \
1180 zinfo.file_size * 1.05 > ZIP64_LIMIT
1181 self.fp.write(zinfo.FileHeader(zip64))
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001182 if zinfo.compress_type == ZIP_DEFLATED:
1183 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1184 zlib.DEFLATED, -15)
1185 else:
1186 cmpr = None
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001187 file_size = 0
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001188 while 1:
1189 buf = fp.read(1024 * 8)
1190 if not buf:
1191 break
1192 file_size = file_size + len(buf)
1193 CRC = crc32(buf, CRC) & 0xffffffff
1194 if cmpr:
1195 buf = cmpr.compress(buf)
1196 compress_size = compress_size + len(buf)
1197 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001198 if cmpr:
1199 buf = cmpr.flush()
1200 compress_size = compress_size + len(buf)
1201 self.fp.write(buf)
1202 zinfo.compress_size = compress_size
1203 else:
1204 zinfo.compress_size = file_size
1205 zinfo.CRC = CRC
1206 zinfo.file_size = file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001207 if not zip64 and self._allowZip64:
1208 if file_size > ZIP64_LIMIT:
1209 raise RuntimeError('File size has increased during compressing')
1210 if compress_size > ZIP64_LIMIT:
1211 raise RuntimeError('Compressed size larger than uncompressed size')
1212 # Seek backwards and write file header (which will now include
1213 # correct CRC and file sizes)
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001214 self.start_dir = self.fp.tell() # Preserve current position in file
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001215 self.fp.seek(zinfo.header_offset, 0)
1216 self.fp.write(zinfo.FileHeader(zip64))
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001217 self.fp.seek(self.start_dir, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001218 self.filelist.append(zinfo)
1219 self.NameToInfo[zinfo.filename] = zinfo
1220
Ronald Oussorendd25e862010-02-07 20:18:02 +00001221 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001222 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001223 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1224 the name of the file in the archive."""
1225 if not isinstance(zinfo_or_arcname, ZipInfo):
1226 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001227 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001228
Just van Rossumb083cb32002-12-12 12:23:32 +00001229 zinfo.compress_type = self.compression
Serhiy Storchaka6d343e72014-09-23 22:39:59 +03001230 if zinfo.filename[-1] == '/':
1231 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
1232 zinfo.external_attr |= 0x10 # MS-DOS directory flag
1233 else:
1234 zinfo.external_attr = 0o600 << 16 # ?rw-------
Just van Rossumb083cb32002-12-12 12:23:32 +00001235 else:
1236 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001237
1238 if not self.fp:
1239 raise RuntimeError(
1240 "Attempt to write to ZIP archive that was already closed")
1241
Ronald Oussorendd25e862010-02-07 20:18:02 +00001242 if compress_type is not None:
1243 zinfo.compress_type = compress_type
1244
Tim Peterse1190062001-01-15 03:34:38 +00001245 zinfo.file_size = len(bytes) # Uncompressed size
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001246 self.fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001247 zinfo.header_offset = self.fp.tell() # Start of header bytes
1248 self._writecheck(zinfo)
1249 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001250 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001251 if zinfo.compress_type == ZIP_DEFLATED:
1252 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1253 zlib.DEFLATED, -15)
1254 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001255 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001256 else:
1257 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001258 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1259 zinfo.compress_size > ZIP64_LIMIT
1260 if zip64 and not self._allowZip64:
1261 raise LargeZipFile("Filesize would require ZIP64 extensions")
1262 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263 self.fp.write(bytes)
1264 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001265 # Write CRC and file sizes after the file data
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001266 fmt = '<LQQ' if zip64 else '<LLL'
1267 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001268 zinfo.file_size))
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001269 self.fp.flush()
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001270 self.start_dir = self.fp.tell()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001271 self.filelist.append(zinfo)
1272 self.NameToInfo[zinfo.filename] = zinfo
1273
1274 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001275 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001276 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001277
1278 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001279 """Close the file, and for mode "w" and "a" write the ending
1280 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001281 if self.fp is None:
1282 return
Tim Petersa608bb22006-06-15 18:06:29 +00001283
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001284 try:
1285 if self.mode in ("w", "a") and self._didModify: # write ending records
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001286 self.fp.seek(self.start_dir, 0)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001287 for zinfo in self.filelist: # write central directory
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001288 dt = zinfo.date_time
1289 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1290 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1291 extra = []
1292 if zinfo.file_size > ZIP64_LIMIT \
1293 or zinfo.compress_size > ZIP64_LIMIT:
1294 extra.append(zinfo.file_size)
1295 extra.append(zinfo.compress_size)
1296 file_size = 0xffffffff
1297 compress_size = 0xffffffff
1298 else:
1299 file_size = zinfo.file_size
1300 compress_size = zinfo.compress_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001301
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001302 if zinfo.header_offset > ZIP64_LIMIT:
1303 extra.append(zinfo.header_offset)
1304 header_offset = 0xffffffffL
1305 else:
1306 header_offset = zinfo.header_offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001307
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001308 extra_data = zinfo.extra
1309 if extra:
1310 # Append a ZIP64 field to the extra's
1311 extra_data = struct.pack(
1312 '<HH' + 'Q'*len(extra),
1313 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001314
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001315 extract_version = max(45, zinfo.extract_version)
1316 create_version = max(45, zinfo.create_version)
1317 else:
1318 extract_version = zinfo.extract_version
1319 create_version = zinfo.create_version
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001320
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001321 try:
1322 filename, flag_bits = zinfo._encodeFilenameFlags()
1323 centdir = struct.pack(structCentralDir,
1324 stringCentralDir, create_version,
1325 zinfo.create_system, extract_version, zinfo.reserved,
1326 flag_bits, zinfo.compress_type, dostime, dosdate,
1327 zinfo.CRC, compress_size, file_size,
1328 len(filename), len(extra_data), len(zinfo.comment),
1329 0, zinfo.internal_attr, zinfo.external_attr,
1330 header_offset)
1331 except DeprecationWarning:
1332 print >>sys.stderr, (structCentralDir,
1333 stringCentralDir, create_version,
1334 zinfo.create_system, extract_version, zinfo.reserved,
1335 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1336 zinfo.CRC, compress_size, file_size,
1337 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1338 0, zinfo.internal_attr, zinfo.external_attr,
1339 header_offset)
1340 raise
1341 self.fp.write(centdir)
1342 self.fp.write(filename)
1343 self.fp.write(extra_data)
1344 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001345
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001346 pos2 = self.fp.tell()
1347 # Write end-of-zip-archive record
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001348 centDirCount = len(self.filelist)
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001349 centDirSize = pos2 - self.start_dir
1350 centDirOffset = self.start_dir
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001351 requires_zip64 = None
1352 if centDirCount > ZIP_FILECOUNT_LIMIT:
1353 requires_zip64 = "Files count"
1354 elif centDirOffset > ZIP64_LIMIT:
1355 requires_zip64 = "Central directory offset"
1356 elif centDirSize > ZIP64_LIMIT:
1357 requires_zip64 = "Central directory size"
1358 if requires_zip64:
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001359 # Need to write the ZIP64 end-of-archive records
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001360 if not self._allowZip64:
1361 raise LargeZipFile(requires_zip64 +
1362 " would require ZIP64 extensions")
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001363 zip64endrec = struct.pack(
1364 structEndArchive64, stringEndArchive64,
1365 44, 45, 45, 0, 0, centDirCount, centDirCount,
1366 centDirSize, centDirOffset)
1367 self.fp.write(zip64endrec)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001368
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001369 zip64locrec = struct.pack(
1370 structEndArchive64Locator,
1371 stringEndArchive64Locator, 0, pos2, 1)
1372 self.fp.write(zip64locrec)
1373 centDirCount = min(centDirCount, 0xFFFF)
1374 centDirSize = min(centDirSize, 0xFFFFFFFF)
1375 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001376
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001377 endrec = struct.pack(structEndArchive, stringEndArchive,
1378 0, 0, centDirCount, centDirCount,
1379 centDirSize, centDirOffset, len(self._comment))
1380 self.fp.write(endrec)
1381 self.fp.write(self._comment)
1382 self.fp.flush()
1383 finally:
1384 fp = self.fp
1385 self.fp = None
Serhiy Storchaka45aa7712014-12-03 09:11:12 +02001386 self._fpclose(fp)
1387
1388 def _fpclose(self, fp):
1389 assert self._fileRefCnt > 0
1390 self._fileRefCnt -= 1
1391 if not self._fileRefCnt and not self._filePassed:
1392 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001393
1394
1395class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001396 """Class to create ZIP archives with Python library files and packages."""
1397
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001398 def writepy(self, pathname, basename = ""):
1399 """Add all files from "pathname" to the ZIP archive.
1400
Fred Drake484d7352000-10-02 21:14:52 +00001401 If pathname is a package directory, search the directory and
1402 all package subdirectories recursively for all *.py and enter
1403 the modules into the archive. If pathname is a plain
1404 directory, listdir *.py and enter all modules. Else, pathname
1405 must be a Python *.py file and the module will be put into the
1406 archive. Added modules are always module.pyo or module.pyc.
1407 This method will compile the module.py into module.pyc if
1408 necessary.
1409 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001410 dir, name = os.path.split(pathname)
1411 if os.path.isdir(pathname):
1412 initname = os.path.join(pathname, "__init__.py")
1413 if os.path.isfile(initname):
1414 # This is a package directory, add it
1415 if basename:
1416 basename = "%s/%s" % (basename, name)
1417 else:
1418 basename = name
1419 if self.debug:
1420 print "Adding package in", pathname, "as", basename
1421 fname, arcname = self._get_codename(initname[0:-3], basename)
1422 if self.debug:
1423 print "Adding", arcname
1424 self.write(fname, arcname)
1425 dirlist = os.listdir(pathname)
1426 dirlist.remove("__init__.py")
1427 # Add all *.py files and package subdirectories
1428 for filename in dirlist:
1429 path = os.path.join(pathname, filename)
1430 root, ext = os.path.splitext(filename)
1431 if os.path.isdir(path):
1432 if os.path.isfile(os.path.join(path, "__init__.py")):
1433 # This is a package directory, add it
1434 self.writepy(path, basename) # Recursive call
1435 elif ext == ".py":
1436 fname, arcname = self._get_codename(path[0:-3],
1437 basename)
1438 if self.debug:
1439 print "Adding", arcname
1440 self.write(fname, arcname)
1441 else:
1442 # This is NOT a package directory, add its files at top level
1443 if self.debug:
1444 print "Adding files from directory", pathname
1445 for filename in os.listdir(pathname):
1446 path = os.path.join(pathname, filename)
1447 root, ext = os.path.splitext(filename)
1448 if ext == ".py":
1449 fname, arcname = self._get_codename(path[0:-3],
1450 basename)
1451 if self.debug:
1452 print "Adding", arcname
1453 self.write(fname, arcname)
1454 else:
1455 if pathname[-3:] != ".py":
1456 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001457 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001458 fname, arcname = self._get_codename(pathname[0:-3], basename)
1459 if self.debug:
1460 print "Adding file", arcname
1461 self.write(fname, arcname)
1462
1463 def _get_codename(self, pathname, basename):
1464 """Return (filename, archivename) for the path.
1465
Fred Drake484d7352000-10-02 21:14:52 +00001466 Given a module name path, return the correct file path and
1467 archive name, compiling if necessary. For example, given
1468 /python/lib/string, return (/python/lib/string.pyc, string).
1469 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001470 file_py = pathname + ".py"
1471 file_pyc = pathname + ".pyc"
1472 file_pyo = pathname + ".pyo"
1473 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001474 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001475 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001476 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001477 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001478 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001479 if self.debug:
1480 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001481 try:
1482 py_compile.compile(file_py, file_pyc, None, True)
1483 except py_compile.PyCompileError,err:
1484 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001485 fname = file_pyc
1486 else:
1487 fname = file_pyc
1488 archivename = os.path.split(fname)[1]
1489 if basename:
1490 archivename = "%s/%s" % (basename, archivename)
1491 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001492
1493
1494def main(args = None):
1495 import textwrap
1496 USAGE=textwrap.dedent("""\
1497 Usage:
1498 zipfile.py -l zipfile.zip # Show listing of a zipfile
1499 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1500 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1501 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1502 """)
1503 if args is None:
1504 args = sys.argv[1:]
1505
1506 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1507 print USAGE
1508 sys.exit(1)
1509
1510 if args[0] == '-l':
1511 if len(args) != 2:
1512 print USAGE
1513 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001514 with ZipFile(args[1], 'r') as zf:
1515 zf.printdir()
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001516
1517 elif args[0] == '-t':
1518 if len(args) != 2:
1519 print USAGE
1520 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001521 with ZipFile(args[1], 'r') as zf:
1522 badfile = zf.testzip()
Antoine Pitroue1436d12010-08-12 15:25:51 +00001523 if badfile:
1524 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001525 print "Done testing"
1526
1527 elif args[0] == '-e':
1528 if len(args) != 3:
1529 print USAGE
1530 sys.exit(1)
1531
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001532 with ZipFile(args[1], 'r') as zf:
Serhiy Storchakac82c4c32014-08-17 15:11:06 +03001533 zf.extractall(args[2])
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001534
1535 elif args[0] == '-c':
1536 if len(args) < 3:
1537 print USAGE
1538 sys.exit(1)
1539
1540 def addToZip(zf, path, zippath):
1541 if os.path.isfile(path):
1542 zf.write(path, zippath, ZIP_DEFLATED)
1543 elif os.path.isdir(path):
Serhiy Storchaka52313d72014-10-04 13:39:18 +03001544 if zippath:
1545 zf.write(path, zippath)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001546 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001547 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001548 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001549 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001550
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001551 with ZipFile(args[1], 'w', allowZip64=True) as zf:
Serhiy Storchaka52313d72014-10-04 13:39:18 +03001552 for path in args[2:]:
1553 zippath = os.path.basename(path)
1554 if not zippath:
1555 zippath = os.path.basename(os.path.dirname(path))
1556 if zippath in ('', os.curdir, os.pardir):
1557 zippath = ''
1558 addToZip(zf, path, zippath)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001559
1560if __name__ == "__main__":
1561 main()