blob: 6722c409978615954af12e89e46a9a181a9e8ef0 [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Serhiy Storchaka7c068752013-02-02 12:30:49 +02008import string
Guido van Rossum32abe6f2000-03-31 17:30:02 +00009
10try:
Tim Peterse1190062001-01-15 03:34:38 +000011 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000012 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000013except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000014 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000015 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000016
Skip Montanaro40fc1602001-03-01 04:27:19 +000017__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000018 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000019
Fred Drake5db246d2000-09-29 20:44:48 +000020class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000021 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000022
23
24class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000025 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000026 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27 and those extensions are disabled.
28 """
29
Tim Peterse1190062001-01-15 03:34:38 +000030error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000031
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000032ZIP64_LIMIT = (1 << 31) - 1
Serhiy Storchaka45efb222014-09-23 21:33:52 +030033ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000034ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000035
Guido van Rossum32abe6f2000-03-31 17:30:02 +000036# constants for Zip file compression methods
37ZIP_STORED = 0
38ZIP_DEFLATED = 8
39# Other ZIP compression methods not supported
40
Martin v. Löwis8c436412008-07-03 12:51:14 +000041# Below are some formats and associated data for reading/writing headers using
42# the struct module. The names and structures of headers/records are those used
43# in the PKWARE description of the ZIP file format:
44# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000046
Martin v. Löwis8c436412008-07-03 12:51:14 +000047# The "end of central directory" structure, magic number, size, and indices
48# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000049structEndArchive = "<4s4H2LH"
50stringEndArchive = "PK\005\006"
51sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000052
53_ECD_SIGNATURE = 0
54_ECD_DISK_NUMBER = 1
55_ECD_DISK_START = 2
56_ECD_ENTRIES_THIS_DISK = 3
57_ECD_ENTRIES_TOTAL = 4
58_ECD_SIZE = 5
59_ECD_OFFSET = 6
60_ECD_COMMENT_SIZE = 7
61# These last two indices are not part of the structure as defined in the
62# spec, but they are used internally by this module as a convenience
63_ECD_COMMENT = 8
64_ECD_LOCATION = 9
65
66# The "central directory" structure, magic number, size, and indices
67# of entries in the structure (section V.F in the format document)
68structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000069stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000070sizeCentralDir = struct.calcsize(structCentralDir)
71
Fred Drake3e038e52001-02-28 17:56:26 +000072# indexes of entries in the central directory structure
73_CD_SIGNATURE = 0
74_CD_CREATE_VERSION = 1
75_CD_CREATE_SYSTEM = 2
76_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000077_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000078_CD_FLAG_BITS = 5
79_CD_COMPRESS_TYPE = 6
80_CD_TIME = 7
81_CD_DATE = 8
82_CD_CRC = 9
83_CD_COMPRESSED_SIZE = 10
84_CD_UNCOMPRESSED_SIZE = 11
85_CD_FILENAME_LENGTH = 12
86_CD_EXTRA_FIELD_LENGTH = 13
87_CD_COMMENT_LENGTH = 14
88_CD_DISK_NUMBER_START = 15
89_CD_INTERNAL_FILE_ATTRIBUTES = 16
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91_CD_LOCAL_HEADER_OFFSET = 18
92
Martin v. Löwis8c436412008-07-03 12:51:14 +000093# The "local file header" structure, magic number, size, and indices
94# (section V.A in the format document)
95structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000096stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000097sizeFileHeader = struct.calcsize(structFileHeader)
98
Fred Drake3e038e52001-02-28 17:56:26 +000099_FH_SIGNATURE = 0
100_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000101_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000102_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103_FH_COMPRESSION_METHOD = 4
104_FH_LAST_MOD_TIME = 5
105_FH_LAST_MOD_DATE = 6
106_FH_CRC = 7
107_FH_COMPRESSED_SIZE = 8
108_FH_UNCOMPRESSED_SIZE = 9
109_FH_FILENAME_LENGTH = 10
110_FH_EXTRA_FIELD_LENGTH = 11
111
Martin v. Löwis8c436412008-07-03 12:51:14 +0000112# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000113structEndArchive64Locator = "<4sLQL"
114stringEndArchive64Locator = "PK\x06\x07"
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000116
117# The "Zip64 end of central directory" record, magic number, size, and indices
118# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000119structEndArchive64 = "<4sQ2H2L4Q"
120stringEndArchive64 = "PK\x06\x06"
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000122
123_CD64_SIGNATURE = 0
124_CD64_DIRECTORY_RECSIZE = 1
125_CD64_CREATE_VERSION = 2
126_CD64_EXTRACT_VERSION = 3
127_CD64_DISK_NUMBER = 4
128_CD64_DISK_NUMBER_START = 5
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130_CD64_NUMBER_ENTRIES_TOTAL = 7
131_CD64_DIRECTORY_SIZE = 8
132_CD64_OFFSET_START_CENTDIR = 9
133
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000134def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000135 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000136 if _EndRecData(fp):
137 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000138 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000139 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000140 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000141
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000142def is_zipfile(filename):
143 """Quickly see if a file is a ZIP file by checking the magic number.
144
145 The filename argument may be a file or file-like object too.
146 """
147 result = False
148 try:
149 if hasattr(filename, "read"):
150 result = _check_zipfile(fp=filename)
151 else:
152 with open(filename, "rb") as fp:
153 result = _check_zipfile(fp)
154 except IOError:
155 pass
156 return result
157
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000158def _EndRecData64(fpin, offset, endrec):
159 """
160 Read the ZIP64 end-of-archive records and use that to update endrec
161 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000162 try:
163 fpin.seek(offset - sizeEndCentDir64Locator, 2)
164 except IOError:
165 # If the seek fails, the file is not large enough to contain a ZIP64
166 # end-of-archive record, so just return the end record we were given.
167 return endrec
168
Martin v. Löwis8c436412008-07-03 12:51:14 +0000169 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200170 if len(data) != sizeEndCentDir64Locator:
171 return endrec
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000174 return endrec
175
176 if diskno != 0 or disks != 1:
177 raise BadZipfile("zipfiles that span multiple disks are not supported")
178
Tim Petersa608bb22006-06-15 18:06:29 +0000179 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181 data = fpin.read(sizeEndCentDir64)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200182 if len(data) != sizeEndCentDir64:
183 return endrec
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000184 sig, sz, create_version, read_version, disk_num, disk_dir, \
185 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000186 struct.unpack(structEndArchive64, data)
187 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000188 return endrec
189
190 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000191 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000192 endrec[_ECD_DISK_NUMBER] = disk_num
193 endrec[_ECD_DISK_START] = disk_dir
194 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195 endrec[_ECD_ENTRIES_TOTAL] = dircount2
196 endrec[_ECD_SIZE] = dirsize
197 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000198 return endrec
199
200
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000201def _EndRecData(fpin):
202 """Return data from the "End of Central Directory" record, or None.
203
204 The data is a list of the nine items in the ZIP "End of central dir"
205 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000206
207 # Determine file size
208 fpin.seek(0, 2)
209 filesize = fpin.tell()
210
211 # Check to see if this is ZIP file with no archive comment (the
212 # "end of central directory" structure should be the last item in the
213 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000214 try:
215 fpin.seek(-sizeEndCentDir, 2)
216 except IOError:
217 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000218 data = fpin.read()
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200219 if (len(data) == sizeEndCentDir and
220 data[0:4] == stringEndArchive and
221 data[-2:] == b"\000\000"):
Martin v. Löwis8c436412008-07-03 12:51:14 +0000222 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000223 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000224 endrec=list(endrec)
225
226 # Append a blank comment and record start offset
227 endrec.append("")
228 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000229
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000230 # Try to read the "Zip64 end of central directory" structure
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000232
233 # Either this is not a ZIP file, or it is a ZIP file with an archive
234 # comment. Search the end of the file for the "end of central directory"
235 # record signature. The comment is the last item in the ZIP file and may be
236 # up to 64K long. It is assumed that the "end of central directory" magic
237 # number does not appear in the comment.
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000240 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000241 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000242 if start >= 0:
243 # found the magic number; attempt to unpack and interpret
244 recData = data[start:start+sizeEndCentDir]
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200245 if len(recData) != sizeEndCentDir:
246 # Zip file is corrupted.
247 return None
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000253
R David Murray873c5832011-06-09 16:01:09 -0400254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000257
258 # Unable to find a valid end of central directory structure
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200259 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000260
Fred Drake484d7352000-10-02 21:14:52 +0000261
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000262class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000263 """Class with attributes describing each file in the ZIP archive."""
264
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000284 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000285 )
286
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000288 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000299 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000300
Greg Ward8e36d282003-06-18 00:53:06 +0000301 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000302 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800303
304 if date_time[0] < 1980:
305 raise ValueError('ZIP does not support timestamps before 1980')
306
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000307 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000308 self.compress_type = ZIP_STORED # Type of compression for the file
309 self.comment = "" # Comment for each file
310 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000311 if sys.platform == 'win32':
312 self.create_system = 0 # System which created ZIP archive
313 else:
314 # Assume everything else is unix-y
315 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000316 self.create_version = 20 # Version which created ZIP archive
317 self.extract_version = 20 # Version needed to extract archive
318 self.reserved = 0 # Must be zero
319 self.flag_bits = 0 # ZIP flag bits
320 self.volume = 0 # Volume number of file header
321 self.internal_attr = 0 # Internal attributes
322 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000323 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000324 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000325 # CRC CRC-32 of the uncompressed file
326 # compress_size Size of the compressed file
327 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200329 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000330 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000331 dt = self.date_time
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000334 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000335 # Set these to zero because we write them after the file data
336 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000337 else:
Tim Peterse1190062001-01-15 03:34:38 +0000338 CRC = self.CRC
339 compress_size = self.compress_size
340 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000341
342 extra = self.extra
343
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200344 if zip64 is None:
345 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346 if zip64:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000347 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000348 extra = extra + struct.pack(fmt,
349 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200350 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351 if not zip64:
352 raise LargeZipFile("Filesize would require ZIP64 extensions")
353 # File is larger than what fits into a 4 byte integer,
354 # fall back to the ZIP64 extension
Martin v. Löwis8c436412008-07-03 12:51:14 +0000355 file_size = 0xffffffff
356 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000357 self.extract_version = max(45, self.extract_version)
358 self.create_version = max(45, self.extract_version)
359
Martin v. Löwis471617d2008-05-05 17:16:58 +0000360 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000361 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000362 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000363 self.compress_type, dostime, dosdate, CRC,
364 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000365 len(filename), len(extra))
366 return header + filename + extra
367
368 def _encodeFilenameFlags(self):
369 if isinstance(self.filename, unicode):
370 try:
371 return self.filename.encode('ascii'), self.flag_bits
372 except UnicodeEncodeError:
373 return self.filename.encode('utf-8'), self.flag_bits | 0x800
374 else:
375 return self.filename, self.flag_bits
376
377 def _decodeFilename(self):
378 if self.flag_bits & 0x800:
379 return self.filename.decode('utf-8')
380 else:
381 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000382
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000383 def _decodeExtra(self):
384 # Try to decode the extra field.
385 extra = self.extra
386 unpack = struct.unpack
Gregory P. Smith0344a062014-05-29 23:41:52 -0700387 while len(extra) >= 4:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000388 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000389 if tp == 1:
390 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000391 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000392 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000393 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000394 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000395 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000396 elif ln == 0:
397 counts = ()
398 else:
399 raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401 idx = 0
402
403 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000404 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000405 self.file_size = counts[idx]
406 idx += 1
407
Martin v. Löwis8c436412008-07-03 12:51:14 +0000408 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000409 self.compress_size = counts[idx]
410 idx += 1
411
Martin v. Löwis8c436412008-07-03 12:51:14 +0000412 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000413 old = self.header_offset
414 self.header_offset = counts[idx]
415 idx+=1
416
417 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000418
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000419
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000420class _ZipDecrypter:
421 """Class to handle decryption of files stored within a ZIP archive.
422
423 ZIP supports a password-based form of encryption. Even though known
424 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000425 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000426
427 Usage:
428 zd = _ZipDecrypter(mypwd)
429 plain_char = zd(cypher_char)
430 plain_text = map(zd, cypher_text)
431 """
432
433 def _GenerateCRCTable():
434 """Generate a CRC-32 table.
435
436 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437 internal keys. We noticed that a direct implementation is faster than
438 relying on binascii.crc32().
439 """
440 poly = 0xedb88320
441 table = [0] * 256
442 for i in range(256):
443 crc = i
444 for j in range(8):
445 if crc & 1:
446 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447 else:
448 crc = ((crc >> 1) & 0x7FFFFFFF)
449 table[i] = crc
450 return table
451 crctable = _GenerateCRCTable()
452
453 def _crc32(self, ch, crc):
454 """Compute the CRC32 primitive on one byte."""
455 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457 def __init__(self, pwd):
458 self.key0 = 305419896
459 self.key1 = 591751049
460 self.key2 = 878082192
461 for p in pwd:
462 self._UpdateKeys(p)
463
464 def _UpdateKeys(self, c):
465 self.key0 = self._crc32(c, self.key0)
466 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470 def __call__(self, c):
471 """Decrypt a single character."""
472 c = ord(c)
473 k = self.key2 | 2
474 c = c ^ (((k * (k^1)) >> 8) & 255)
475 c = chr(c)
476 self._UpdateKeys(c)
477 return c
478
Ezio Melotti9e949722012-11-18 13:18:06 +0200479
480compressor_names = {
481 0: 'store',
482 1: 'shrink',
483 2: 'reduce',
484 3: 'reduce',
485 4: 'reduce',
486 5: 'reduce',
487 6: 'implode',
488 7: 'tokenize',
489 8: 'deflate',
490 9: 'deflate64',
491 10: 'implode',
492 12: 'bzip2',
493 14: 'lzma',
494 18: 'terse',
495 19: 'lz77',
496 97: 'wavpack',
497 98: 'ppmd',
498}
499
500
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000501class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000502 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000503 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000504 """
Tim Petersea5962f2007-03-12 18:07:52 +0000505
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000506 # Max size supported by decompressor.
507 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000508
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000509 # Read from compressed files in 4k blocks.
510 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000511
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000512 # Search for universal newlines or line chunks.
513 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
514
Jesus Cea93d628b2012-11-04 02:32:08 +0100515 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
516 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000517 self._fileobj = fileobj
518 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100519 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000520
Ezio Melotti4611b052010-01-28 01:41:30 +0000521 self._compress_type = zipinfo.compress_type
522 self._compress_size = zipinfo.compress_size
523 self._compress_left = zipinfo.compress_size
524
525 if self._compress_type == ZIP_DEFLATED:
526 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti9e949722012-11-18 13:18:06 +0200527 elif self._compress_type != ZIP_STORED:
528 descr = compressor_names.get(self._compress_type)
529 if descr:
530 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
531 else:
532 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000533 self._unconsumed = ''
534
535 self._readbuffer = ''
536 self._offset = 0
537
538 self._universal = 'U' in mode
539 self.newlines = None
540
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000541 # Adjust read size for encrypted files since the first 12 bytes
542 # are for the encryption/password information.
543 if self._decrypter is not None:
544 self._compress_left -= 12
545
546 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000547 self.name = zipinfo.filename
548
Antoine Pitroue1436d12010-08-12 15:25:51 +0000549 if hasattr(zipinfo, 'CRC'):
550 self._expected_crc = zipinfo.CRC
551 self._running_crc = crc32(b'') & 0xffffffff
552 else:
553 self._expected_crc = None
554
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000555 def readline(self, limit=-1):
556 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000557
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000558 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000559 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000560
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000561 if not self._universal and limit < 0:
562 # Shortcut common case - newline found in buffer.
563 i = self._readbuffer.find('\n', self._offset) + 1
564 if i > 0:
565 line = self._readbuffer[self._offset: i]
566 self._offset = i
567 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000568
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000569 if not self._universal:
570 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000571
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000572 line = ''
573 while limit < 0 or len(line) < limit:
574 readahead = self.peek(2)
575 if readahead == '':
576 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000577
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000578 #
579 # Search for universal newlines or line chunks.
580 #
581 # The pattern returns either a line chunk or a newline, but not
582 # both. Combined with peek(2), we are assured that the sequence
583 # '\r\n' is always retrieved completely and never split into
584 # separate newlines - '\r', '\n' due to coincidental readaheads.
585 #
586 match = self.PATTERN.search(readahead)
587 newline = match.group('newline')
588 if newline is not None:
589 if self.newlines is None:
590 self.newlines = []
591 if newline not in self.newlines:
592 self.newlines.append(newline)
593 self._offset += len(newline)
594 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000595
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000596 chunk = match.group('chunk')
597 if limit >= 0:
598 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000599
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000600 self._offset += len(chunk)
601 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000602
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000603 return line
604
605 def peek(self, n=1):
606 """Returns buffered bytes without advancing the position."""
607 if n > len(self._readbuffer) - self._offset:
608 chunk = self.read(n)
Serhiy Storchakad1051962013-12-21 23:51:15 +0200609 if len(chunk) > self._offset:
610 self._readbuffer = chunk + self._readbuffer[self._offset:]
611 self._offset = 0
612 else:
613 self._offset -= len(chunk)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000614
615 # Return up to 512 bytes to reduce allocation overhead for tight loops.
616 return self._readbuffer[self._offset: self._offset + 512]
617
618 def readable(self):
619 return True
620
621 def read(self, n=-1):
622 """Read and return up to n bytes.
623 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000624 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000625 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000626 if n is None:
627 n = -1
628 while True:
629 if n < 0:
630 data = self.read1(n)
631 elif n > len(buf):
632 data = self.read1(n - len(buf))
633 else:
634 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000635 if len(data) == 0:
636 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000637 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000638
Antoine Pitroue1436d12010-08-12 15:25:51 +0000639 def _update_crc(self, newdata, eof):
640 # Update the CRC using the given data.
641 if self._expected_crc is None:
642 # No need to compute the CRC if we don't have a reference value
643 return
644 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
645 # Check the CRC if we're at the end of the file
646 if eof and self._running_crc != self._expected_crc:
647 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
648
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000649 def read1(self, n):
650 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000651
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000652 # Simplify algorithm (branching) by transforming negative n to large n.
653 if n < 0 or n is None:
654 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000655
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000656 # Bytes available in read buffer.
657 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000658
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000659 # Read from file.
660 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
661 nbytes = n - len_readbuffer - len(self._unconsumed)
662 nbytes = max(nbytes, self.MIN_READ_SIZE)
663 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000664
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000665 data = self._fileobj.read(nbytes)
666 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000667
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000668 if data and self._decrypter is not None:
669 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000670
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000671 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000672 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000673 self._readbuffer = self._readbuffer[self._offset:] + data
674 self._offset = 0
675 else:
676 # Prepare deflated bytes for decompression.
677 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000678
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000679 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000680 if (len(self._unconsumed) > 0 and n > len_readbuffer and
681 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000682 data = self._decompressor.decompress(
683 self._unconsumed,
684 max(n - len_readbuffer, self.MIN_READ_SIZE)
685 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000686
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000687 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000688 eof = len(self._unconsumed) == 0 and self._compress_left == 0
689 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000690 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000691
Antoine Pitroue1436d12010-08-12 15:25:51 +0000692 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000693 self._readbuffer = self._readbuffer[self._offset:] + data
694 self._offset = 0
695
696 # Read from buffer.
697 data = self._readbuffer[self._offset: self._offset + n]
698 self._offset += len(data)
699 return data
700
Jesus Cea93d628b2012-11-04 02:32:08 +0100701 def close(self):
702 try :
703 if self._close_fileobj:
704 self._fileobj.close()
705 finally:
706 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000707
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000708
R David Murray3f4ccba2012-04-12 18:42:47 -0400709class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000710 """ Class with methods to open, read, write, close, list zip files.
711
Martin v. Löwis8c436412008-07-03 12:51:14 +0000712 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000713
Fred Drake3d9091e2001-03-26 15:49:24 +0000714 file: Either the path to the file, or a file-like object.
715 If it is a path, the file will be opened and closed by ZipFile.
716 mode: The mode can be either read "r", write "w" or append "a".
717 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000718 allowZip64: if True ZipFile will create files with ZIP64 extensions when
719 needed, otherwise it will raise an exception when this would
720 be necessary.
721
Fred Drake3d9091e2001-03-26 15:49:24 +0000722 """
Fred Drake484d7352000-10-02 21:14:52 +0000723
Fred Drake90eac282001-02-28 05:29:34 +0000724 fp = None # Set here since __del__ checks it
725
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000726 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000727 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000728 if mode not in ("r", "w", "a"):
729 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
730
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000731 if compression == ZIP_STORED:
732 pass
733 elif compression == ZIP_DEFLATED:
734 if not zlib:
735 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000736 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000737 else:
738 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000739
740 self._allowZip64 = allowZip64
741 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000742 self.debug = 0 # Level of printing: 0 through 3
743 self.NameToInfo = {} # Find file info given name
744 self.filelist = [] # List of ZipInfo instances for archive
745 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000746 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000747 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400748 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000749
Fred Drake3d9091e2001-03-26 15:49:24 +0000750 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000751 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000752 self._filePassed = 0
753 self.filename = file
754 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000755 try:
756 self.fp = open(file, modeDict[mode])
757 except IOError:
758 if mode == 'a':
759 mode = key = 'w'
760 self.fp = open(file, modeDict[mode])
761 else:
762 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000763 else:
764 self._filePassed = 1
765 self.fp = file
766 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000767
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100768 try:
769 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000770 self._RealGetContents()
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100771 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000772 # set the modified flag so central directory gets written
773 # even if no files are added to the archive
774 self._didModify = True
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100775 elif key == 'a':
776 try:
777 # See if file is a zip file
778 self._RealGetContents()
779 # seek to start of directory and overwrite
780 self.fp.seek(self.start_dir, 0)
781 except BadZipfile:
782 # file is not a zip file, just append
783 self.fp.seek(0, 2)
784
785 # set the modified flag so central directory gets written
786 # even if no files are added to the archive
787 self._didModify = True
788 else:
789 raise RuntimeError('Mode must be "r", "w" or "a"')
790 except:
791 fp = self.fp
792 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000793 if not self._filePassed:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100794 fp.close()
795 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796
Ezio Melotti569e61f2009-12-30 06:14:51 +0000797 def __enter__(self):
798 return self
799
800 def __exit__(self, type, value, traceback):
801 self.close()
802
Tim Peters7d3bad62001-04-04 18:56:49 +0000803 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000804 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000806 try:
807 endrec = _EndRecData(fp)
808 except IOError:
809 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000810 if not endrec:
811 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 if self.debug > 1:
813 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000814 size_cd = endrec[_ECD_SIZE] # bytes in central directory
815 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400816 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000817
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000819 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000820 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
821 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000822 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
823
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000824 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000825 inferred = concat + offset_cd
826 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 # self.start_dir: Position of start of central directory
828 self.start_dir = offset_cd + concat
829 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000830 data = fp.read(size_cd)
831 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 total = 0
833 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000834 centdir = fp.read(sizeCentralDir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200835 if len(centdir) != sizeCentralDir:
836 raise BadZipfile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200838 if centdir[_CD_SIGNATURE] != stringCentralDir:
839 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000840 if self.debug > 2:
841 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000842 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000843 # Create ZipInfo instance to store file information
844 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000845 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
846 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000847 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848 (x.create_version, x.create_system, x.extract_version, x.reserved,
849 x.flag_bits, x.compress_type, t, d,
850 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
851 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
852 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000853 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000854 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000855 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000856
857 x._decodeExtra()
858 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000859 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000860 self.filelist.append(x)
861 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000862
863 # update total bytes read from central directory
864 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
865 + centdir[_CD_EXTRA_FIELD_LENGTH]
866 + centdir[_CD_COMMENT_LENGTH])
867
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000868 if self.debug > 2:
869 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000870
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000871
872 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000873 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000874 l = []
875 for data in self.filelist:
876 l.append(data.filename)
877 return l
878
879 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000880 """Return a list of class ZipInfo instances for files in the
881 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000882 return self.filelist
883
884 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000885 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000886 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
887 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000888 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000889 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
890
891 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000892 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000893 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000894 for zinfo in self.filelist:
895 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000896 # Read by chunks, to avoid an OverflowError or a
897 # MemoryError with very large embedded files.
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100898 with self.open(zinfo.filename, "r") as f:
899 while f.read(chunk_size): # Check CRC-32
900 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000901 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000902 return zinfo.filename
903
904 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000905 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000906 info = self.NameToInfo.get(name)
907 if info is None:
908 raise KeyError(
909 'There is no item named %r in the archive' % name)
910
911 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000912
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000913 def setpassword(self, pwd):
914 """Set default password for encrypted files."""
915 self.pwd = pwd
916
R David Murray3f4ccba2012-04-12 18:42:47 -0400917 @property
918 def comment(self):
919 """The comment text associated with the ZIP file."""
920 return self._comment
921
922 @comment.setter
923 def comment(self, comment):
924 # check for valid comment length
Serhiy Storchaka49259352014-01-20 21:57:09 +0200925 if len(comment) > ZIP_MAX_COMMENT:
926 import warnings
927 warnings.warn('Archive comment is too long; truncating to %d bytes'
928 % ZIP_MAX_COMMENT, stacklevel=2)
R David Murray3f4ccba2012-04-12 18:42:47 -0400929 comment = comment[:ZIP_MAX_COMMENT]
930 self._comment = comment
931 self._didModify = True
932
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000933 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000934 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000935 return self.open(name, "r", pwd).read()
936
937 def open(self, name, mode="r", pwd=None):
938 """Return file-like object for 'name'."""
939 if mode not in ("r", "U", "rU"):
940 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000941 if not self.fp:
942 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000943 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000944
Tim Petersea5962f2007-03-12 18:07:52 +0000945 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000946 # given a file object in the constructor
947 if self._filePassed:
948 zef_file = self.fp
Jesus Cea93d628b2012-11-04 02:32:08 +0100949 should_close = False
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000950 else:
951 zef_file = open(self.filename, 'rb')
Jesus Cea93d628b2012-11-04 02:32:08 +0100952 should_close = True
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000953
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100954 try:
955 # Make sure we have an info object
956 if isinstance(name, ZipInfo):
957 # 'name' is already an info object
958 zinfo = name
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000959 else:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100960 # Get info object for name
961 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000962
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100963 zef_file.seek(zinfo.header_offset, 0)
964
965 # Skip the file header:
966 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200967 if len(fheader) != sizeFileHeader:
968 raise BadZipfile("Truncated file header")
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100969 fheader = struct.unpack(structFileHeader, fheader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200970 if fheader[_FH_SIGNATURE] != stringFileHeader:
971 raise BadZipfile("Bad magic number for file header")
972
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100973 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
974 if fheader[_FH_EXTRA_FIELD_LENGTH]:
975 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
976
977 if fname != zinfo.orig_filename:
978 raise BadZipfile, \
979 'File name in directory "%s" and header "%s" differ.' % (
980 zinfo.orig_filename, fname)
981
982 # check for encrypted flag & handle password
983 is_encrypted = zinfo.flag_bits & 0x1
984 zd = None
985 if is_encrypted:
986 if not pwd:
987 pwd = self.pwd
988 if not pwd:
989 raise RuntimeError, "File %s is encrypted, " \
990 "password required for extraction" % name
991
992 zd = _ZipDecrypter(pwd)
993 # The first 12 bytes in the cypher stream is an encryption header
994 # used to strengthen the algorithm. The first 11 bytes are
995 # completely random, while the 12th contains the MSB of the CRC,
996 # or the MSB of the file time depending on the header type
997 # and is used to check the correctness of the password.
998 bytes = zef_file.read(12)
999 h = map(zd, bytes[0:12])
1000 if zinfo.flag_bits & 0x8:
1001 # compare against the file type from extended local headers
1002 check_byte = (zinfo._raw_time >> 8) & 0xff
1003 else:
1004 # compare against the CRC otherwise
1005 check_byte = (zinfo.CRC >> 24) & 0xff
1006 if ord(h[11]) != check_byte:
1007 raise RuntimeError("Bad password for file", name)
1008
1009 return ZipExtFile(zef_file, mode, zinfo, zd,
1010 close_fileobj=should_close)
1011 except:
1012 if should_close:
1013 zef_file.close()
1014 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015
Georg Brandl62416bc2008-01-07 18:47:44 +00001016 def extract(self, member, path=None, pwd=None):
1017 """Extract a member from the archive to the current working directory,
1018 using its full name. Its file information is extracted as accurately
1019 as possible. `member' may be a filename or a ZipInfo object. You can
1020 specify a different directory using `path'.
1021 """
1022 if not isinstance(member, ZipInfo):
1023 member = self.getinfo(member)
1024
1025 if path is None:
1026 path = os.getcwd()
1027
1028 return self._extract_member(member, path, pwd)
1029
1030 def extractall(self, path=None, members=None, pwd=None):
1031 """Extract all members from the archive to the current working
1032 directory. `path' specifies a different directory to extract to.
1033 `members' is optional and must be a subset of the list returned
1034 by namelist().
1035 """
1036 if members is None:
1037 members = self.namelist()
1038
1039 for zipinfo in members:
1040 self.extract(zipinfo, path, pwd)
1041
1042 def _extract_member(self, member, targetpath, pwd):
1043 """Extract the ZipInfo object 'member' to a physical
1044 file on the path targetpath.
1045 """
1046 # build the destination pathname, replacing
1047 # forward slashes to platform specific separators.
Gregory P. Smith608cc452013-02-01 11:40:18 -08001048 arcname = member.filename.replace('/', os.path.sep)
Georg Brandl62416bc2008-01-07 18:47:44 +00001049
Gregory P. Smith608cc452013-02-01 11:40:18 -08001050 if os.path.altsep:
1051 arcname = arcname.replace(os.path.altsep, os.path.sep)
1052 # interpret absolute pathname as relative, remove drive letter or
1053 # UNC path, redundant separators, "." and ".." components.
1054 arcname = os.path.splitdrive(arcname)[1]
1055 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1056 if x not in ('', os.path.curdir, os.path.pardir))
Gregory P. Smith608cc452013-02-01 11:40:18 -08001057 if os.path.sep == '\\':
Serhiy Storchaka13e56c72013-02-02 17:46:33 +02001058 # filter illegal characters on Windows
Gregory P. Smith608cc452013-02-01 11:40:18 -08001059 illegal = ':<>|"?*'
Serhiy Storchaka6fa83f92013-04-13 12:28:17 +03001060 if isinstance(arcname, unicode):
1061 table = {ord(c): ord('_') for c in illegal}
1062 else:
1063 table = string.maketrans(illegal, '_' * len(illegal))
Gregory P. Smith608cc452013-02-01 11:40:18 -08001064 arcname = arcname.translate(table)
Serhiy Storchaka13e56c72013-02-02 17:46:33 +02001065 # remove trailing dots
1066 arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1067 arcname = os.path.sep.join(x for x in arcname if x)
Georg Brandl62416bc2008-01-07 18:47:44 +00001068
Gregory P. Smith608cc452013-02-01 11:40:18 -08001069 targetpath = os.path.join(targetpath, arcname)
Georg Brandl62416bc2008-01-07 18:47:44 +00001070 targetpath = os.path.normpath(targetpath)
1071
1072 # Create all upper directories if necessary.
1073 upperdirs = os.path.dirname(targetpath)
1074 if upperdirs and not os.path.exists(upperdirs):
1075 os.makedirs(upperdirs)
1076
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001077 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001078 if not os.path.isdir(targetpath):
1079 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001080 return targetpath
1081
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001082 with self.open(member, pwd=pwd) as source, \
1083 file(targetpath, "wb") as target:
1084 shutil.copyfileobj(source, target)
Georg Brandl62416bc2008-01-07 18:47:44 +00001085
1086 return targetpath
1087
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001088 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001089 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001090 if zinfo.filename in self.NameToInfo:
Serhiy Storchaka49259352014-01-20 21:57:09 +02001091 import warnings
1092 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001093 if self.mode not in ("w", "a"):
1094 raise RuntimeError, 'write() requires mode "w" or "a"'
1095 if not self.fp:
1096 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001097 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001098 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1099 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001100 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001101 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1102 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001103 "That compression method is not supported"
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001104 if not self._allowZip64:
1105 requires_zip64 = None
1106 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1107 requires_zip64 = "Files count"
1108 elif zinfo.file_size > ZIP64_LIMIT:
1109 requires_zip64 = "Filesize"
1110 elif zinfo.header_offset > ZIP64_LIMIT:
1111 requires_zip64 = "Zipfile size"
1112 if requires_zip64:
1113 raise LargeZipFile(requires_zip64 +
1114 " would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001115
1116 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001117 """Put the bytes from filename into the archive under the name
1118 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001119 if not self.fp:
1120 raise RuntimeError(
1121 "Attempt to write to ZIP archive that was already closed")
1122
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001124 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001125 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001126 date_time = mtime[0:6]
1127 # Create ZipInfo instance to store file information
1128 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001129 arcname = filename
1130 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1131 while arcname[0] in (os.sep, os.altsep):
1132 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001133 if isdir:
1134 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001135 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001136 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001137 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001138 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001139 else:
Tim Peterse1190062001-01-15 03:34:38 +00001140 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001141
1142 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001143 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001144 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001145
1146 self._writecheck(zinfo)
1147 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001148
1149 if isdir:
1150 zinfo.file_size = 0
1151 zinfo.compress_size = 0
1152 zinfo.CRC = 0
1153 self.filelist.append(zinfo)
1154 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001155 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001156 return
1157
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001158 with open(filename, "rb") as fp:
1159 # Must overwrite CRC and sizes with correct data later
1160 zinfo.CRC = CRC = 0
1161 zinfo.compress_size = compress_size = 0
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001162 # Compressed size can be larger than uncompressed size
1163 zip64 = self._allowZip64 and \
1164 zinfo.file_size * 1.05 > ZIP64_LIMIT
1165 self.fp.write(zinfo.FileHeader(zip64))
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001166 if zinfo.compress_type == ZIP_DEFLATED:
1167 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1168 zlib.DEFLATED, -15)
1169 else:
1170 cmpr = None
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001171 file_size = 0
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001172 while 1:
1173 buf = fp.read(1024 * 8)
1174 if not buf:
1175 break
1176 file_size = file_size + len(buf)
1177 CRC = crc32(buf, CRC) & 0xffffffff
1178 if cmpr:
1179 buf = cmpr.compress(buf)
1180 compress_size = compress_size + len(buf)
1181 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001182 if cmpr:
1183 buf = cmpr.flush()
1184 compress_size = compress_size + len(buf)
1185 self.fp.write(buf)
1186 zinfo.compress_size = compress_size
1187 else:
1188 zinfo.compress_size = file_size
1189 zinfo.CRC = CRC
1190 zinfo.file_size = file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001191 if not zip64 and self._allowZip64:
1192 if file_size > ZIP64_LIMIT:
1193 raise RuntimeError('File size has increased during compressing')
1194 if compress_size > ZIP64_LIMIT:
1195 raise RuntimeError('Compressed size larger than uncompressed size')
1196 # Seek backwards and write file header (which will now include
1197 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001198 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001199 self.fp.seek(zinfo.header_offset, 0)
1200 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001201 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001202 self.filelist.append(zinfo)
1203 self.NameToInfo[zinfo.filename] = zinfo
1204
Ronald Oussorendd25e862010-02-07 20:18:02 +00001205 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001206 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001207 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1208 the name of the file in the archive."""
1209 if not isinstance(zinfo_or_arcname, ZipInfo):
1210 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001211 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001212
Just van Rossumb083cb32002-12-12 12:23:32 +00001213 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001214 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001215 else:
1216 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001217
1218 if not self.fp:
1219 raise RuntimeError(
1220 "Attempt to write to ZIP archive that was already closed")
1221
Ronald Oussorendd25e862010-02-07 20:18:02 +00001222 if compress_type is not None:
1223 zinfo.compress_type = compress_type
1224
Tim Peterse1190062001-01-15 03:34:38 +00001225 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001226 zinfo.header_offset = self.fp.tell() # Start of header bytes
1227 self._writecheck(zinfo)
1228 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001229 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001230 if zinfo.compress_type == ZIP_DEFLATED:
1231 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1232 zlib.DEFLATED, -15)
1233 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001234 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001235 else:
1236 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001237 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1238 zinfo.compress_size > ZIP64_LIMIT
1239 if zip64 and not self._allowZip64:
1240 raise LargeZipFile("Filesize would require ZIP64 extensions")
1241 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001242 self.fp.write(bytes)
1243 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001244 # Write CRC and file sizes after the file data
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001245 fmt = '<LQQ' if zip64 else '<LLL'
1246 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001247 zinfo.file_size))
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001248 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001249 self.filelist.append(zinfo)
1250 self.NameToInfo[zinfo.filename] = zinfo
1251
1252 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001253 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001254 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001255
1256 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001257 """Close the file, and for mode "w" and "a" write the ending
1258 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001259 if self.fp is None:
1260 return
Tim Petersa608bb22006-06-15 18:06:29 +00001261
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001262 try:
1263 if self.mode in ("w", "a") and self._didModify: # write ending records
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001264 pos1 = self.fp.tell()
1265 for zinfo in self.filelist: # write central directory
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001266 dt = zinfo.date_time
1267 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1268 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1269 extra = []
1270 if zinfo.file_size > ZIP64_LIMIT \
1271 or zinfo.compress_size > ZIP64_LIMIT:
1272 extra.append(zinfo.file_size)
1273 extra.append(zinfo.compress_size)
1274 file_size = 0xffffffff
1275 compress_size = 0xffffffff
1276 else:
1277 file_size = zinfo.file_size
1278 compress_size = zinfo.compress_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001279
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001280 if zinfo.header_offset > ZIP64_LIMIT:
1281 extra.append(zinfo.header_offset)
1282 header_offset = 0xffffffffL
1283 else:
1284 header_offset = zinfo.header_offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001285
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001286 extra_data = zinfo.extra
1287 if extra:
1288 # Append a ZIP64 field to the extra's
1289 extra_data = struct.pack(
1290 '<HH' + 'Q'*len(extra),
1291 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001292
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001293 extract_version = max(45, zinfo.extract_version)
1294 create_version = max(45, zinfo.create_version)
1295 else:
1296 extract_version = zinfo.extract_version
1297 create_version = zinfo.create_version
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001298
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001299 try:
1300 filename, flag_bits = zinfo._encodeFilenameFlags()
1301 centdir = struct.pack(structCentralDir,
1302 stringCentralDir, create_version,
1303 zinfo.create_system, extract_version, zinfo.reserved,
1304 flag_bits, zinfo.compress_type, dostime, dosdate,
1305 zinfo.CRC, compress_size, file_size,
1306 len(filename), len(extra_data), len(zinfo.comment),
1307 0, zinfo.internal_attr, zinfo.external_attr,
1308 header_offset)
1309 except DeprecationWarning:
1310 print >>sys.stderr, (structCentralDir,
1311 stringCentralDir, create_version,
1312 zinfo.create_system, extract_version, zinfo.reserved,
1313 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1314 zinfo.CRC, compress_size, file_size,
1315 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1316 0, zinfo.internal_attr, zinfo.external_attr,
1317 header_offset)
1318 raise
1319 self.fp.write(centdir)
1320 self.fp.write(filename)
1321 self.fp.write(extra_data)
1322 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001323
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001324 pos2 = self.fp.tell()
1325 # Write end-of-zip-archive record
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001326 centDirCount = len(self.filelist)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001327 centDirSize = pos2 - pos1
1328 centDirOffset = pos1
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001329 requires_zip64 = None
1330 if centDirCount > ZIP_FILECOUNT_LIMIT:
1331 requires_zip64 = "Files count"
1332 elif centDirOffset > ZIP64_LIMIT:
1333 requires_zip64 = "Central directory offset"
1334 elif centDirSize > ZIP64_LIMIT:
1335 requires_zip64 = "Central directory size"
1336 if requires_zip64:
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001337 # Need to write the ZIP64 end-of-archive records
Serhiy Storchaka45efb222014-09-23 21:33:52 +03001338 if not self._allowZip64:
1339 raise LargeZipFile(requires_zip64 +
1340 " would require ZIP64 extensions")
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001341 zip64endrec = struct.pack(
1342 structEndArchive64, stringEndArchive64,
1343 44, 45, 45, 0, 0, centDirCount, centDirCount,
1344 centDirSize, centDirOffset)
1345 self.fp.write(zip64endrec)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001346
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001347 zip64locrec = struct.pack(
1348 structEndArchive64Locator,
1349 stringEndArchive64Locator, 0, pos2, 1)
1350 self.fp.write(zip64locrec)
1351 centDirCount = min(centDirCount, 0xFFFF)
1352 centDirSize = min(centDirSize, 0xFFFFFFFF)
1353 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001354
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001355 endrec = struct.pack(structEndArchive, stringEndArchive,
1356 0, 0, centDirCount, centDirCount,
1357 centDirSize, centDirOffset, len(self._comment))
1358 self.fp.write(endrec)
1359 self.fp.write(self._comment)
1360 self.fp.flush()
1361 finally:
1362 fp = self.fp
1363 self.fp = None
1364 if not self._filePassed:
1365 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001366
1367
1368class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001369 """Class to create ZIP archives with Python library files and packages."""
1370
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371 def writepy(self, pathname, basename = ""):
1372 """Add all files from "pathname" to the ZIP archive.
1373
Fred Drake484d7352000-10-02 21:14:52 +00001374 If pathname is a package directory, search the directory and
1375 all package subdirectories recursively for all *.py and enter
1376 the modules into the archive. If pathname is a plain
1377 directory, listdir *.py and enter all modules. Else, pathname
1378 must be a Python *.py file and the module will be put into the
1379 archive. Added modules are always module.pyo or module.pyc.
1380 This method will compile the module.py into module.pyc if
1381 necessary.
1382 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001383 dir, name = os.path.split(pathname)
1384 if os.path.isdir(pathname):
1385 initname = os.path.join(pathname, "__init__.py")
1386 if os.path.isfile(initname):
1387 # This is a package directory, add it
1388 if basename:
1389 basename = "%s/%s" % (basename, name)
1390 else:
1391 basename = name
1392 if self.debug:
1393 print "Adding package in", pathname, "as", basename
1394 fname, arcname = self._get_codename(initname[0:-3], basename)
1395 if self.debug:
1396 print "Adding", arcname
1397 self.write(fname, arcname)
1398 dirlist = os.listdir(pathname)
1399 dirlist.remove("__init__.py")
1400 # Add all *.py files and package subdirectories
1401 for filename in dirlist:
1402 path = os.path.join(pathname, filename)
1403 root, ext = os.path.splitext(filename)
1404 if os.path.isdir(path):
1405 if os.path.isfile(os.path.join(path, "__init__.py")):
1406 # This is a package directory, add it
1407 self.writepy(path, basename) # Recursive call
1408 elif ext == ".py":
1409 fname, arcname = self._get_codename(path[0:-3],
1410 basename)
1411 if self.debug:
1412 print "Adding", arcname
1413 self.write(fname, arcname)
1414 else:
1415 # This is NOT a package directory, add its files at top level
1416 if self.debug:
1417 print "Adding files from directory", pathname
1418 for filename in os.listdir(pathname):
1419 path = os.path.join(pathname, filename)
1420 root, ext = os.path.splitext(filename)
1421 if ext == ".py":
1422 fname, arcname = self._get_codename(path[0:-3],
1423 basename)
1424 if self.debug:
1425 print "Adding", arcname
1426 self.write(fname, arcname)
1427 else:
1428 if pathname[-3:] != ".py":
1429 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001430 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001431 fname, arcname = self._get_codename(pathname[0:-3], basename)
1432 if self.debug:
1433 print "Adding file", arcname
1434 self.write(fname, arcname)
1435
1436 def _get_codename(self, pathname, basename):
1437 """Return (filename, archivename) for the path.
1438
Fred Drake484d7352000-10-02 21:14:52 +00001439 Given a module name path, return the correct file path and
1440 archive name, compiling if necessary. For example, given
1441 /python/lib/string, return (/python/lib/string.pyc, string).
1442 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001443 file_py = pathname + ".py"
1444 file_pyc = pathname + ".pyc"
1445 file_pyo = pathname + ".pyo"
1446 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001447 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001448 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001449 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001450 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001451 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001452 if self.debug:
1453 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001454 try:
1455 py_compile.compile(file_py, file_pyc, None, True)
1456 except py_compile.PyCompileError,err:
1457 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001458 fname = file_pyc
1459 else:
1460 fname = file_pyc
1461 archivename = os.path.split(fname)[1]
1462 if basename:
1463 archivename = "%s/%s" % (basename, archivename)
1464 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001465
1466
1467def main(args = None):
1468 import textwrap
1469 USAGE=textwrap.dedent("""\
1470 Usage:
1471 zipfile.py -l zipfile.zip # Show listing of a zipfile
1472 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1473 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1474 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1475 """)
1476 if args is None:
1477 args = sys.argv[1:]
1478
1479 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1480 print USAGE
1481 sys.exit(1)
1482
1483 if args[0] == '-l':
1484 if len(args) != 2:
1485 print USAGE
1486 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001487 with ZipFile(args[1], 'r') as zf:
1488 zf.printdir()
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001489
1490 elif args[0] == '-t':
1491 if len(args) != 2:
1492 print USAGE
1493 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001494 with ZipFile(args[1], 'r') as zf:
1495 badfile = zf.testzip()
Antoine Pitroue1436d12010-08-12 15:25:51 +00001496 if badfile:
1497 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001498 print "Done testing"
1499
1500 elif args[0] == '-e':
1501 if len(args) != 3:
1502 print USAGE
1503 sys.exit(1)
1504
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001505 with ZipFile(args[1], 'r') as zf:
Serhiy Storchakac82c4c32014-08-17 15:11:06 +03001506 zf.extractall(args[2])
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001507
1508 elif args[0] == '-c':
1509 if len(args) < 3:
1510 print USAGE
1511 sys.exit(1)
1512
1513 def addToZip(zf, path, zippath):
1514 if os.path.isfile(path):
1515 zf.write(path, zippath, ZIP_DEFLATED)
1516 elif os.path.isdir(path):
1517 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001518 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001519 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001520 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001521
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001522 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1523 for src in args[2:]:
1524 addToZip(zf, src, os.path.basename(src))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001525
1526if __name__ == "__main__":
1527 main()