blob: 31000accbb55992faa2c965cf95c3fc04b78f03d [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200169 if len(data) != sizeEndCentDir64Locator:
170 return endrec
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000171 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
172 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 return endrec
174
175 if diskno != 0 or disks != 1:
176 raise BadZipfile("zipfiles that span multiple disks are not supported")
177
Tim Petersa608bb22006-06-15 18:06:29 +0000178 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000179 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
180 data = fpin.read(sizeEndCentDir64)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200181 if len(data) != sizeEndCentDir64:
182 return endrec
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 sig, sz, create_version, read_version, disk_num, disk_dir, \
184 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000185 struct.unpack(structEndArchive64, data)
186 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000187 return endrec
188
189 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000190 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000191 endrec[_ECD_DISK_NUMBER] = disk_num
192 endrec[_ECD_DISK_START] = disk_dir
193 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
194 endrec[_ECD_ENTRIES_TOTAL] = dircount2
195 endrec[_ECD_SIZE] = dirsize
196 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000197 return endrec
198
199
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000200def _EndRecData(fpin):
201 """Return data from the "End of Central Directory" record, or None.
202
203 The data is a list of the nine items in the ZIP "End of central dir"
204 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000205
206 # Determine file size
207 fpin.seek(0, 2)
208 filesize = fpin.tell()
209
210 # Check to see if this is ZIP file with no archive comment (the
211 # "end of central directory" structure should be the last item in the
212 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000213 try:
214 fpin.seek(-sizeEndCentDir, 2)
215 except IOError:
216 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000217 data = fpin.read()
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200218 if (len(data) == sizeEndCentDir and
219 data[0:4] == stringEndArchive and
220 data[-2:] == b"\000\000"):
Martin v. Löwis8c436412008-07-03 12:51:14 +0000221 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000222 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000223 endrec=list(endrec)
224
225 # Append a blank comment and record start offset
226 endrec.append("")
227 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000228
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000229 # Try to read the "Zip64 end of central directory" structure
230 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000231
232 # Either this is not a ZIP file, or it is a ZIP file with an archive
233 # comment. Search the end of the file for the "end of central directory"
234 # record signature. The comment is the last item in the ZIP file and may be
235 # up to 64K long. It is assumed that the "end of central directory" magic
236 # number does not appear in the comment.
237 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
238 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000239 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000240 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000241 if start >= 0:
242 # found the magic number; attempt to unpack and interpret
243 recData = data[start:start+sizeEndCentDir]
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200244 if len(recData) != sizeEndCentDir:
245 # Zip file is corrupted.
246 return None
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000247 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400248 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
249 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
250 endrec.append(comment)
251 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000252
R David Murray873c5832011-06-09 16:01:09 -0400253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, maxCommentStart + start - filesize,
255 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000256
257 # Unable to find a valid end of central directory structure
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200258 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000259
Fred Drake484d7352000-10-02 21:14:52 +0000260
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000261class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000262 """Class with attributes describing each file in the ZIP archive."""
263
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000264 __slots__ = (
265 'orig_filename',
266 'filename',
267 'date_time',
268 'compress_type',
269 'comment',
270 'extra',
271 'create_system',
272 'create_version',
273 'extract_version',
274 'reserved',
275 'flag_bits',
276 'volume',
277 'internal_attr',
278 'external_attr',
279 'header_offset',
280 'CRC',
281 'compress_size',
282 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000283 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000284 )
285
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000286 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000288
289 # Terminate the file name at the first null byte. Null bytes in file
290 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000291 null_byte = filename.find(chr(0))
292 if null_byte >= 0:
293 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000294 # This is used to ensure paths in generated ZIP files always use
295 # forward slashes as the directory separator, as required by the
296 # ZIP format specification.
297 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000298 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000299
Greg Ward8e36d282003-06-18 00:53:06 +0000300 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000301 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800302
303 if date_time[0] < 1980:
304 raise ValueError('ZIP does not support timestamps before 1980')
305
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000306 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000307 self.compress_type = ZIP_STORED # Type of compression for the file
308 self.comment = "" # Comment for each file
309 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000310 if sys.platform == 'win32':
311 self.create_system = 0 # System which created ZIP archive
312 else:
313 # Assume everything else is unix-y
314 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000315 self.create_version = 20 # Version which created ZIP archive
316 self.extract_version = 20 # Version needed to extract archive
317 self.reserved = 0 # Must be zero
318 self.flag_bits = 0 # ZIP flag bits
319 self.volume = 0 # Volume number of file header
320 self.internal_attr = 0 # Internal attributes
321 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000323 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000324 # CRC CRC-32 of the uncompressed file
325 # compress_size Size of the compressed file
326 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200328 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000329 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000330 dt = self.date_time
331 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000332 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000333 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000334 # Set these to zero because we write them after the file data
335 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 else:
Tim Peterse1190062001-01-15 03:34:38 +0000337 CRC = self.CRC
338 compress_size = self.compress_size
339 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000340
341 extra = self.extra
342
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200343 if zip64 is None:
344 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
345 if zip64:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000346 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000347 extra = extra + struct.pack(fmt,
348 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200349 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
350 if not zip64:
351 raise LargeZipFile("Filesize would require ZIP64 extensions")
352 # File is larger than what fits into a 4 byte integer,
353 # fall back to the ZIP64 extension
Martin v. Löwis8c436412008-07-03 12:51:14 +0000354 file_size = 0xffffffff
355 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000356 self.extract_version = max(45, self.extract_version)
357 self.create_version = max(45, self.extract_version)
358
Martin v. Löwis471617d2008-05-05 17:16:58 +0000359 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000360 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000361 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 self.compress_type, dostime, dosdate, CRC,
363 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000364 len(filename), len(extra))
365 return header + filename + extra
366
367 def _encodeFilenameFlags(self):
368 if isinstance(self.filename, unicode):
369 try:
370 return self.filename.encode('ascii'), self.flag_bits
371 except UnicodeEncodeError:
372 return self.filename.encode('utf-8'), self.flag_bits | 0x800
373 else:
374 return self.filename, self.flag_bits
375
376 def _decodeFilename(self):
377 if self.flag_bits & 0x800:
378 return self.filename.decode('utf-8')
379 else:
380 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 def _decodeExtra(self):
383 # Try to decode the extra field.
384 extra = self.extra
385 unpack = struct.unpack
386 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000387 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000388 if tp == 1:
389 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000390 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000391 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000392 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000393 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000394 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000395 elif ln == 0:
396 counts = ()
397 else:
398 raise RuntimeError, "Corrupt extra field %s"%(ln,)
399
400 idx = 0
401
402 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000403 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000404 self.file_size = counts[idx]
405 idx += 1
406
Martin v. Löwis8c436412008-07-03 12:51:14 +0000407 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000408 self.compress_size = counts[idx]
409 idx += 1
410
Martin v. Löwis8c436412008-07-03 12:51:14 +0000411 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000412 old = self.header_offset
413 self.header_offset = counts[idx]
414 idx+=1
415
416 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000417
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000418
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000419class _ZipDecrypter:
420 """Class to handle decryption of files stored within a ZIP archive.
421
422 ZIP supports a password-based form of encryption. Even though known
423 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000424 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000425
426 Usage:
427 zd = _ZipDecrypter(mypwd)
428 plain_char = zd(cypher_char)
429 plain_text = map(zd, cypher_text)
430 """
431
432 def _GenerateCRCTable():
433 """Generate a CRC-32 table.
434
435 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
436 internal keys. We noticed that a direct implementation is faster than
437 relying on binascii.crc32().
438 """
439 poly = 0xedb88320
440 table = [0] * 256
441 for i in range(256):
442 crc = i
443 for j in range(8):
444 if crc & 1:
445 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
446 else:
447 crc = ((crc >> 1) & 0x7FFFFFFF)
448 table[i] = crc
449 return table
450 crctable = _GenerateCRCTable()
451
452 def _crc32(self, ch, crc):
453 """Compute the CRC32 primitive on one byte."""
454 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
455
456 def __init__(self, pwd):
457 self.key0 = 305419896
458 self.key1 = 591751049
459 self.key2 = 878082192
460 for p in pwd:
461 self._UpdateKeys(p)
462
463 def _UpdateKeys(self, c):
464 self.key0 = self._crc32(c, self.key0)
465 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
466 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
467 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
468
469 def __call__(self, c):
470 """Decrypt a single character."""
471 c = ord(c)
472 k = self.key2 | 2
473 c = c ^ (((k * (k^1)) >> 8) & 255)
474 c = chr(c)
475 self._UpdateKeys(c)
476 return c
477
Ezio Melotti9e949722012-11-18 13:18:06 +0200478
479compressor_names = {
480 0: 'store',
481 1: 'shrink',
482 2: 'reduce',
483 3: 'reduce',
484 4: 'reduce',
485 5: 'reduce',
486 6: 'implode',
487 7: 'tokenize',
488 8: 'deflate',
489 9: 'deflate64',
490 10: 'implode',
491 12: 'bzip2',
492 14: 'lzma',
493 18: 'terse',
494 19: 'lz77',
495 97: 'wavpack',
496 98: 'ppmd',
497}
498
499
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000500class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000501 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000502 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000503 """
Tim Petersea5962f2007-03-12 18:07:52 +0000504
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000505 # Max size supported by decompressor.
506 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000507
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000508 # Read from compressed files in 4k blocks.
509 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000510
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000511 # Search for universal newlines or line chunks.
512 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
513
Jesus Cea93d628b2012-11-04 02:32:08 +0100514 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
515 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000516 self._fileobj = fileobj
517 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100518 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000519
Ezio Melotti4611b052010-01-28 01:41:30 +0000520 self._compress_type = zipinfo.compress_type
521 self._compress_size = zipinfo.compress_size
522 self._compress_left = zipinfo.compress_size
523
524 if self._compress_type == ZIP_DEFLATED:
525 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti9e949722012-11-18 13:18:06 +0200526 elif self._compress_type != ZIP_STORED:
527 descr = compressor_names.get(self._compress_type)
528 if descr:
529 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
530 else:
531 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000532 self._unconsumed = ''
533
534 self._readbuffer = ''
535 self._offset = 0
536
537 self._universal = 'U' in mode
538 self.newlines = None
539
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000540 # Adjust read size for encrypted files since the first 12 bytes
541 # are for the encryption/password information.
542 if self._decrypter is not None:
543 self._compress_left -= 12
544
545 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000546 self.name = zipinfo.filename
547
Antoine Pitroue1436d12010-08-12 15:25:51 +0000548 if hasattr(zipinfo, 'CRC'):
549 self._expected_crc = zipinfo.CRC
550 self._running_crc = crc32(b'') & 0xffffffff
551 else:
552 self._expected_crc = None
553
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000554 def readline(self, limit=-1):
555 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000556
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000557 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000558 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000559
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000560 if not self._universal and limit < 0:
561 # Shortcut common case - newline found in buffer.
562 i = self._readbuffer.find('\n', self._offset) + 1
563 if i > 0:
564 line = self._readbuffer[self._offset: i]
565 self._offset = i
566 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000567
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000568 if not self._universal:
569 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000570
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000571 line = ''
572 while limit < 0 or len(line) < limit:
573 readahead = self.peek(2)
574 if readahead == '':
575 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000576
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000577 #
578 # Search for universal newlines or line chunks.
579 #
580 # The pattern returns either a line chunk or a newline, but not
581 # both. Combined with peek(2), we are assured that the sequence
582 # '\r\n' is always retrieved completely and never split into
583 # separate newlines - '\r', '\n' due to coincidental readaheads.
584 #
585 match = self.PATTERN.search(readahead)
586 newline = match.group('newline')
587 if newline is not None:
588 if self.newlines is None:
589 self.newlines = []
590 if newline not in self.newlines:
591 self.newlines.append(newline)
592 self._offset += len(newline)
593 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000594
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000595 chunk = match.group('chunk')
596 if limit >= 0:
597 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000598
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000599 self._offset += len(chunk)
600 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000601
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000602 return line
603
604 def peek(self, n=1):
605 """Returns buffered bytes without advancing the position."""
606 if n > len(self._readbuffer) - self._offset:
607 chunk = self.read(n)
608 self._offset -= len(chunk)
609
610 # Return up to 512 bytes to reduce allocation overhead for tight loops.
611 return self._readbuffer[self._offset: self._offset + 512]
612
613 def readable(self):
614 return True
615
616 def read(self, n=-1):
617 """Read and return up to n bytes.
618 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000619 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000620 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000621 if n is None:
622 n = -1
623 while True:
624 if n < 0:
625 data = self.read1(n)
626 elif n > len(buf):
627 data = self.read1(n - len(buf))
628 else:
629 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000630 if len(data) == 0:
631 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000632 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000633
Antoine Pitroue1436d12010-08-12 15:25:51 +0000634 def _update_crc(self, newdata, eof):
635 # Update the CRC using the given data.
636 if self._expected_crc is None:
637 # No need to compute the CRC if we don't have a reference value
638 return
639 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
640 # Check the CRC if we're at the end of the file
641 if eof and self._running_crc != self._expected_crc:
642 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
643
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000644 def read1(self, n):
645 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000646
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000647 # Simplify algorithm (branching) by transforming negative n to large n.
648 if n < 0 or n is None:
649 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000650
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000651 # Bytes available in read buffer.
652 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000653
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000654 # Read from file.
655 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
656 nbytes = n - len_readbuffer - len(self._unconsumed)
657 nbytes = max(nbytes, self.MIN_READ_SIZE)
658 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000659
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000660 data = self._fileobj.read(nbytes)
661 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000662
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000663 if data and self._decrypter is not None:
664 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000665
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000666 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000667 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000668 self._readbuffer = self._readbuffer[self._offset:] + data
669 self._offset = 0
670 else:
671 # Prepare deflated bytes for decompression.
672 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000673
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000674 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000675 if (len(self._unconsumed) > 0 and n > len_readbuffer and
676 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000677 data = self._decompressor.decompress(
678 self._unconsumed,
679 max(n - len_readbuffer, self.MIN_READ_SIZE)
680 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000681
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000682 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000683 eof = len(self._unconsumed) == 0 and self._compress_left == 0
684 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000685 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000686
Antoine Pitroue1436d12010-08-12 15:25:51 +0000687 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000688 self._readbuffer = self._readbuffer[self._offset:] + data
689 self._offset = 0
690
691 # Read from buffer.
692 data = self._readbuffer[self._offset: self._offset + n]
693 self._offset += len(data)
694 return data
695
Jesus Cea93d628b2012-11-04 02:32:08 +0100696 def close(self):
697 try :
698 if self._close_fileobj:
699 self._fileobj.close()
700 finally:
701 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000702
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000703
R David Murray3f4ccba2012-04-12 18:42:47 -0400704class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000705 """ Class with methods to open, read, write, close, list zip files.
706
Martin v. Löwis8c436412008-07-03 12:51:14 +0000707 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000708
Fred Drake3d9091e2001-03-26 15:49:24 +0000709 file: Either the path to the file, or a file-like object.
710 If it is a path, the file will be opened and closed by ZipFile.
711 mode: The mode can be either read "r", write "w" or append "a".
712 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000713 allowZip64: if True ZipFile will create files with ZIP64 extensions when
714 needed, otherwise it will raise an exception when this would
715 be necessary.
716
Fred Drake3d9091e2001-03-26 15:49:24 +0000717 """
Fred Drake484d7352000-10-02 21:14:52 +0000718
Fred Drake90eac282001-02-28 05:29:34 +0000719 fp = None # Set here since __del__ checks it
720
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000721 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000722 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000723 if mode not in ("r", "w", "a"):
724 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
725
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 if compression == ZIP_STORED:
727 pass
728 elif compression == ZIP_DEFLATED:
729 if not zlib:
730 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000731 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 else:
733 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000734
735 self._allowZip64 = allowZip64
736 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000737 self.debug = 0 # Level of printing: 0 through 3
738 self.NameToInfo = {} # Find file info given name
739 self.filelist = [] # List of ZipInfo instances for archive
740 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000741 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000742 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400743 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000744
Fred Drake3d9091e2001-03-26 15:49:24 +0000745 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000746 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000747 self._filePassed = 0
748 self.filename = file
749 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000750 try:
751 self.fp = open(file, modeDict[mode])
752 except IOError:
753 if mode == 'a':
754 mode = key = 'w'
755 self.fp = open(file, modeDict[mode])
756 else:
757 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000758 else:
759 self._filePassed = 1
760 self.fp = file
761 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000762
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100763 try:
764 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000765 self._RealGetContents()
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100766 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000767 # set the modified flag so central directory gets written
768 # even if no files are added to the archive
769 self._didModify = True
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100770 elif key == 'a':
771 try:
772 # See if file is a zip file
773 self._RealGetContents()
774 # seek to start of directory and overwrite
775 self.fp.seek(self.start_dir, 0)
776 except BadZipfile:
777 # file is not a zip file, just append
778 self.fp.seek(0, 2)
779
780 # set the modified flag so central directory gets written
781 # even if no files are added to the archive
782 self._didModify = True
783 else:
784 raise RuntimeError('Mode must be "r", "w" or "a"')
785 except:
786 fp = self.fp
787 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000788 if not self._filePassed:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100789 fp.close()
790 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791
Ezio Melotti569e61f2009-12-30 06:14:51 +0000792 def __enter__(self):
793 return self
794
795 def __exit__(self, type, value, traceback):
796 self.close()
797
Tim Peters7d3bad62001-04-04 18:56:49 +0000798 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000799 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000800 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000801 try:
802 endrec = _EndRecData(fp)
803 except IOError:
804 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000805 if not endrec:
806 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807 if self.debug > 1:
808 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000809 size_cd = endrec[_ECD_SIZE] # bytes in central directory
810 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400811 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000812
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000814 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000815 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
816 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000817 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
818
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000820 inferred = concat + offset_cd
821 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 # self.start_dir: Position of start of central directory
823 self.start_dir = offset_cd + concat
824 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000825 data = fp.read(size_cd)
826 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 total = 0
828 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000829 centdir = fp.read(sizeCentralDir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200830 if len(centdir) != sizeCentralDir:
831 raise BadZipfile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200833 if centdir[_CD_SIGNATURE] != stringCentralDir:
834 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 if self.debug > 2:
836 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000837 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 # Create ZipInfo instance to store file information
839 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000840 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
841 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000842 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000843 (x.create_version, x.create_system, x.extract_version, x.reserved,
844 x.flag_bits, x.compress_type, t, d,
845 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
846 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
847 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000848 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000849 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000850 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000851
852 x._decodeExtra()
853 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000854 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855 self.filelist.append(x)
856 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000857
858 # update total bytes read from central directory
859 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
860 + centdir[_CD_EXTRA_FIELD_LENGTH]
861 + centdir[_CD_COMMENT_LENGTH])
862
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000863 if self.debug > 2:
864 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000865
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000866
867 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000868 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000869 l = []
870 for data in self.filelist:
871 l.append(data.filename)
872 return l
873
874 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000875 """Return a list of class ZipInfo instances for files in the
876 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877 return self.filelist
878
879 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000880 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000881 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
882 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000883 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000884 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
885
886 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000887 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000888 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000889 for zinfo in self.filelist:
890 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000891 # Read by chunks, to avoid an OverflowError or a
892 # MemoryError with very large embedded files.
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100893 with self.open(zinfo.filename, "r") as f:
894 while f.read(chunk_size): # Check CRC-32
895 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000896 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000897 return zinfo.filename
898
899 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000900 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000901 info = self.NameToInfo.get(name)
902 if info is None:
903 raise KeyError(
904 'There is no item named %r in the archive' % name)
905
906 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000908 def setpassword(self, pwd):
909 """Set default password for encrypted files."""
910 self.pwd = pwd
911
R David Murray3f4ccba2012-04-12 18:42:47 -0400912 @property
913 def comment(self):
914 """The comment text associated with the ZIP file."""
915 return self._comment
916
917 @comment.setter
918 def comment(self, comment):
919 # check for valid comment length
920 if len(comment) >= ZIP_MAX_COMMENT:
921 if self.debug:
922 print('Archive comment is too long; truncating to %d bytes'
923 % ZIP_MAX_COMMENT)
924 comment = comment[:ZIP_MAX_COMMENT]
925 self._comment = comment
926 self._didModify = True
927
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000928 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000929 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000930 return self.open(name, "r", pwd).read()
931
932 def open(self, name, mode="r", pwd=None):
933 """Return file-like object for 'name'."""
934 if mode not in ("r", "U", "rU"):
935 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000936 if not self.fp:
937 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000938 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000939
Tim Petersea5962f2007-03-12 18:07:52 +0000940 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000941 # given a file object in the constructor
942 if self._filePassed:
943 zef_file = self.fp
Jesus Cea93d628b2012-11-04 02:32:08 +0100944 should_close = False
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000945 else:
946 zef_file = open(self.filename, 'rb')
Jesus Cea93d628b2012-11-04 02:32:08 +0100947 should_close = True
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000948
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100949 try:
950 # Make sure we have an info object
951 if isinstance(name, ZipInfo):
952 # 'name' is already an info object
953 zinfo = name
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000954 else:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100955 # Get info object for name
956 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000957
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100958 zef_file.seek(zinfo.header_offset, 0)
959
960 # Skip the file header:
961 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200962 if len(fheader) != sizeFileHeader:
963 raise BadZipfile("Truncated file header")
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100964 fheader = struct.unpack(structFileHeader, fheader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200965 if fheader[_FH_SIGNATURE] != stringFileHeader:
966 raise BadZipfile("Bad magic number for file header")
967
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100968 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
969 if fheader[_FH_EXTRA_FIELD_LENGTH]:
970 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
971
972 if fname != zinfo.orig_filename:
973 raise BadZipfile, \
974 'File name in directory "%s" and header "%s" differ.' % (
975 zinfo.orig_filename, fname)
976
977 # check for encrypted flag & handle password
978 is_encrypted = zinfo.flag_bits & 0x1
979 zd = None
980 if is_encrypted:
981 if not pwd:
982 pwd = self.pwd
983 if not pwd:
984 raise RuntimeError, "File %s is encrypted, " \
985 "password required for extraction" % name
986
987 zd = _ZipDecrypter(pwd)
988 # The first 12 bytes in the cypher stream is an encryption header
989 # used to strengthen the algorithm. The first 11 bytes are
990 # completely random, while the 12th contains the MSB of the CRC,
991 # or the MSB of the file time depending on the header type
992 # and is used to check the correctness of the password.
993 bytes = zef_file.read(12)
994 h = map(zd, bytes[0:12])
995 if zinfo.flag_bits & 0x8:
996 # compare against the file type from extended local headers
997 check_byte = (zinfo._raw_time >> 8) & 0xff
998 else:
999 # compare against the CRC otherwise
1000 check_byte = (zinfo.CRC >> 24) & 0xff
1001 if ord(h[11]) != check_byte:
1002 raise RuntimeError("Bad password for file", name)
1003
1004 return ZipExtFile(zef_file, mode, zinfo, zd,
1005 close_fileobj=should_close)
1006 except:
1007 if should_close:
1008 zef_file.close()
1009 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010
Georg Brandl62416bc2008-01-07 18:47:44 +00001011 def extract(self, member, path=None, pwd=None):
1012 """Extract a member from the archive to the current working directory,
1013 using its full name. Its file information is extracted as accurately
1014 as possible. `member' may be a filename or a ZipInfo object. You can
1015 specify a different directory using `path'.
1016 """
1017 if not isinstance(member, ZipInfo):
1018 member = self.getinfo(member)
1019
1020 if path is None:
1021 path = os.getcwd()
1022
1023 return self._extract_member(member, path, pwd)
1024
1025 def extractall(self, path=None, members=None, pwd=None):
1026 """Extract all members from the archive to the current working
1027 directory. `path' specifies a different directory to extract to.
1028 `members' is optional and must be a subset of the list returned
1029 by namelist().
1030 """
1031 if members is None:
1032 members = self.namelist()
1033
1034 for zipinfo in members:
1035 self.extract(zipinfo, path, pwd)
1036
1037 def _extract_member(self, member, targetpath, pwd):
1038 """Extract the ZipInfo object 'member' to a physical
1039 file on the path targetpath.
1040 """
1041 # build the destination pathname, replacing
1042 # forward slashes to platform specific separators.
Gregory P. Smith608cc452013-02-01 11:40:18 -08001043 arcname = member.filename.replace('/', os.path.sep)
Georg Brandl62416bc2008-01-07 18:47:44 +00001044
Gregory P. Smith608cc452013-02-01 11:40:18 -08001045 if os.path.altsep:
1046 arcname = arcname.replace(os.path.altsep, os.path.sep)
1047 # interpret absolute pathname as relative, remove drive letter or
1048 # UNC path, redundant separators, "." and ".." components.
1049 arcname = os.path.splitdrive(arcname)[1]
1050 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1051 if x not in ('', os.path.curdir, os.path.pardir))
1052 # filter illegal characters on Windows
1053 if os.path.sep == '\\':
1054 illegal = ':<>|"?*'
1055 table = str.maketrans(illegal, '_' * len(illegal))
1056 arcname = arcname.translate(table)
Georg Brandl62416bc2008-01-07 18:47:44 +00001057
Gregory P. Smith608cc452013-02-01 11:40:18 -08001058 targetpath = os.path.join(targetpath, arcname)
Georg Brandl62416bc2008-01-07 18:47:44 +00001059 targetpath = os.path.normpath(targetpath)
1060
1061 # Create all upper directories if necessary.
1062 upperdirs = os.path.dirname(targetpath)
1063 if upperdirs and not os.path.exists(upperdirs):
1064 os.makedirs(upperdirs)
1065
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001066 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001067 if not os.path.isdir(targetpath):
1068 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001069 return targetpath
1070
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001071 with self.open(member, pwd=pwd) as source, \
1072 file(targetpath, "wb") as target:
1073 shutil.copyfileobj(source, target)
Georg Brandl62416bc2008-01-07 18:47:44 +00001074
1075 return targetpath
1076
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001077 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001078 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001079 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001080 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001081 print "Duplicate name:", zinfo.filename
1082 if self.mode not in ("w", "a"):
1083 raise RuntimeError, 'write() requires mode "w" or "a"'
1084 if not self.fp:
1085 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001086 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1088 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001089 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001090 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1091 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001092 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001093 if zinfo.file_size > ZIP64_LIMIT:
1094 if not self._allowZip64:
1095 raise LargeZipFile("Filesize would require ZIP64 extensions")
1096 if zinfo.header_offset > ZIP64_LIMIT:
1097 if not self._allowZip64:
1098 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001099
1100 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001101 """Put the bytes from filename into the archive under the name
1102 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001103 if not self.fp:
1104 raise RuntimeError(
1105 "Attempt to write to ZIP archive that was already closed")
1106
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001107 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001108 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001109 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001110 date_time = mtime[0:6]
1111 # Create ZipInfo instance to store file information
1112 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001113 arcname = filename
1114 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1115 while arcname[0] in (os.sep, os.altsep):
1116 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001117 if isdir:
1118 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001119 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001120 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001121 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001122 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001123 else:
Tim Peterse1190062001-01-15 03:34:38 +00001124 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001125
1126 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001127 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001128 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001129
1130 self._writecheck(zinfo)
1131 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001132
1133 if isdir:
1134 zinfo.file_size = 0
1135 zinfo.compress_size = 0
1136 zinfo.CRC = 0
1137 self.filelist.append(zinfo)
1138 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001139 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001140 return
1141
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001142 with open(filename, "rb") as fp:
1143 # Must overwrite CRC and sizes with correct data later
1144 zinfo.CRC = CRC = 0
1145 zinfo.compress_size = compress_size = 0
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001146 # Compressed size can be larger than uncompressed size
1147 zip64 = self._allowZip64 and \
1148 zinfo.file_size * 1.05 > ZIP64_LIMIT
1149 self.fp.write(zinfo.FileHeader(zip64))
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001150 if zinfo.compress_type == ZIP_DEFLATED:
1151 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1152 zlib.DEFLATED, -15)
1153 else:
1154 cmpr = None
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001155 file_size = 0
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001156 while 1:
1157 buf = fp.read(1024 * 8)
1158 if not buf:
1159 break
1160 file_size = file_size + len(buf)
1161 CRC = crc32(buf, CRC) & 0xffffffff
1162 if cmpr:
1163 buf = cmpr.compress(buf)
1164 compress_size = compress_size + len(buf)
1165 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001166 if cmpr:
1167 buf = cmpr.flush()
1168 compress_size = compress_size + len(buf)
1169 self.fp.write(buf)
1170 zinfo.compress_size = compress_size
1171 else:
1172 zinfo.compress_size = file_size
1173 zinfo.CRC = CRC
1174 zinfo.file_size = file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001175 if not zip64 and self._allowZip64:
1176 if file_size > ZIP64_LIMIT:
1177 raise RuntimeError('File size has increased during compressing')
1178 if compress_size > ZIP64_LIMIT:
1179 raise RuntimeError('Compressed size larger than uncompressed size')
1180 # Seek backwards and write file header (which will now include
1181 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001182 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001183 self.fp.seek(zinfo.header_offset, 0)
1184 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001185 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 self.filelist.append(zinfo)
1187 self.NameToInfo[zinfo.filename] = zinfo
1188
Ronald Oussorendd25e862010-02-07 20:18:02 +00001189 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001190 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001191 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1192 the name of the file in the archive."""
1193 if not isinstance(zinfo_or_arcname, ZipInfo):
1194 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001195 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001196
Just van Rossumb083cb32002-12-12 12:23:32 +00001197 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001198 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001199 else:
1200 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001201
1202 if not self.fp:
1203 raise RuntimeError(
1204 "Attempt to write to ZIP archive that was already closed")
1205
Ronald Oussorendd25e862010-02-07 20:18:02 +00001206 if compress_type is not None:
1207 zinfo.compress_type = compress_type
1208
Tim Peterse1190062001-01-15 03:34:38 +00001209 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001210 zinfo.header_offset = self.fp.tell() # Start of header bytes
1211 self._writecheck(zinfo)
1212 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001213 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 if zinfo.compress_type == ZIP_DEFLATED:
1215 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1216 zlib.DEFLATED, -15)
1217 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001218 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001219 else:
1220 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001221 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1222 zinfo.compress_size > ZIP64_LIMIT
1223 if zip64 and not self._allowZip64:
1224 raise LargeZipFile("Filesize would require ZIP64 extensions")
1225 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001226 self.fp.write(bytes)
1227 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001228 # Write CRC and file sizes after the file data
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001229 fmt = '<LQQ' if zip64 else '<LLL'
1230 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001231 zinfo.file_size))
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001232 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001233 self.filelist.append(zinfo)
1234 self.NameToInfo[zinfo.filename] = zinfo
1235
1236 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001237 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001238 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001239
1240 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001241 """Close the file, and for mode "w" and "a" write the ending
1242 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001243 if self.fp is None:
1244 return
Tim Petersa608bb22006-06-15 18:06:29 +00001245
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001246 try:
1247 if self.mode in ("w", "a") and self._didModify: # write ending records
1248 count = 0
1249 pos1 = self.fp.tell()
1250 for zinfo in self.filelist: # write central directory
1251 count = count + 1
1252 dt = zinfo.date_time
1253 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1254 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1255 extra = []
1256 if zinfo.file_size > ZIP64_LIMIT \
1257 or zinfo.compress_size > ZIP64_LIMIT:
1258 extra.append(zinfo.file_size)
1259 extra.append(zinfo.compress_size)
1260 file_size = 0xffffffff
1261 compress_size = 0xffffffff
1262 else:
1263 file_size = zinfo.file_size
1264 compress_size = zinfo.compress_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001265
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001266 if zinfo.header_offset > ZIP64_LIMIT:
1267 extra.append(zinfo.header_offset)
1268 header_offset = 0xffffffffL
1269 else:
1270 header_offset = zinfo.header_offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001271
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001272 extra_data = zinfo.extra
1273 if extra:
1274 # Append a ZIP64 field to the extra's
1275 extra_data = struct.pack(
1276 '<HH' + 'Q'*len(extra),
1277 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001278
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001279 extract_version = max(45, zinfo.extract_version)
1280 create_version = max(45, zinfo.create_version)
1281 else:
1282 extract_version = zinfo.extract_version
1283 create_version = zinfo.create_version
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001284
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001285 try:
1286 filename, flag_bits = zinfo._encodeFilenameFlags()
1287 centdir = struct.pack(structCentralDir,
1288 stringCentralDir, create_version,
1289 zinfo.create_system, extract_version, zinfo.reserved,
1290 flag_bits, zinfo.compress_type, dostime, dosdate,
1291 zinfo.CRC, compress_size, file_size,
1292 len(filename), len(extra_data), len(zinfo.comment),
1293 0, zinfo.internal_attr, zinfo.external_attr,
1294 header_offset)
1295 except DeprecationWarning:
1296 print >>sys.stderr, (structCentralDir,
1297 stringCentralDir, create_version,
1298 zinfo.create_system, extract_version, zinfo.reserved,
1299 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1300 zinfo.CRC, compress_size, file_size,
1301 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1302 0, zinfo.internal_attr, zinfo.external_attr,
1303 header_offset)
1304 raise
1305 self.fp.write(centdir)
1306 self.fp.write(filename)
1307 self.fp.write(extra_data)
1308 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001309
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001310 pos2 = self.fp.tell()
1311 # Write end-of-zip-archive record
1312 centDirCount = count
1313 centDirSize = pos2 - pos1
1314 centDirOffset = pos1
1315 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1316 centDirOffset > ZIP64_LIMIT or
1317 centDirSize > ZIP64_LIMIT):
1318 # Need to write the ZIP64 end-of-archive records
1319 zip64endrec = struct.pack(
1320 structEndArchive64, stringEndArchive64,
1321 44, 45, 45, 0, 0, centDirCount, centDirCount,
1322 centDirSize, centDirOffset)
1323 self.fp.write(zip64endrec)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001324
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001325 zip64locrec = struct.pack(
1326 structEndArchive64Locator,
1327 stringEndArchive64Locator, 0, pos2, 1)
1328 self.fp.write(zip64locrec)
1329 centDirCount = min(centDirCount, 0xFFFF)
1330 centDirSize = min(centDirSize, 0xFFFFFFFF)
1331 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001332
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001333 endrec = struct.pack(structEndArchive, stringEndArchive,
1334 0, 0, centDirCount, centDirCount,
1335 centDirSize, centDirOffset, len(self._comment))
1336 self.fp.write(endrec)
1337 self.fp.write(self._comment)
1338 self.fp.flush()
1339 finally:
1340 fp = self.fp
1341 self.fp = None
1342 if not self._filePassed:
1343 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344
1345
1346class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001347 """Class to create ZIP archives with Python library files and packages."""
1348
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 def writepy(self, pathname, basename = ""):
1350 """Add all files from "pathname" to the ZIP archive.
1351
Fred Drake484d7352000-10-02 21:14:52 +00001352 If pathname is a package directory, search the directory and
1353 all package subdirectories recursively for all *.py and enter
1354 the modules into the archive. If pathname is a plain
1355 directory, listdir *.py and enter all modules. Else, pathname
1356 must be a Python *.py file and the module will be put into the
1357 archive. Added modules are always module.pyo or module.pyc.
1358 This method will compile the module.py into module.pyc if
1359 necessary.
1360 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001361 dir, name = os.path.split(pathname)
1362 if os.path.isdir(pathname):
1363 initname = os.path.join(pathname, "__init__.py")
1364 if os.path.isfile(initname):
1365 # This is a package directory, add it
1366 if basename:
1367 basename = "%s/%s" % (basename, name)
1368 else:
1369 basename = name
1370 if self.debug:
1371 print "Adding package in", pathname, "as", basename
1372 fname, arcname = self._get_codename(initname[0:-3], basename)
1373 if self.debug:
1374 print "Adding", arcname
1375 self.write(fname, arcname)
1376 dirlist = os.listdir(pathname)
1377 dirlist.remove("__init__.py")
1378 # Add all *.py files and package subdirectories
1379 for filename in dirlist:
1380 path = os.path.join(pathname, filename)
1381 root, ext = os.path.splitext(filename)
1382 if os.path.isdir(path):
1383 if os.path.isfile(os.path.join(path, "__init__.py")):
1384 # This is a package directory, add it
1385 self.writepy(path, basename) # Recursive call
1386 elif ext == ".py":
1387 fname, arcname = self._get_codename(path[0:-3],
1388 basename)
1389 if self.debug:
1390 print "Adding", arcname
1391 self.write(fname, arcname)
1392 else:
1393 # This is NOT a package directory, add its files at top level
1394 if self.debug:
1395 print "Adding files from directory", pathname
1396 for filename in os.listdir(pathname):
1397 path = os.path.join(pathname, filename)
1398 root, ext = os.path.splitext(filename)
1399 if ext == ".py":
1400 fname, arcname = self._get_codename(path[0:-3],
1401 basename)
1402 if self.debug:
1403 print "Adding", arcname
1404 self.write(fname, arcname)
1405 else:
1406 if pathname[-3:] != ".py":
1407 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001408 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001409 fname, arcname = self._get_codename(pathname[0:-3], basename)
1410 if self.debug:
1411 print "Adding file", arcname
1412 self.write(fname, arcname)
1413
1414 def _get_codename(self, pathname, basename):
1415 """Return (filename, archivename) for the path.
1416
Fred Drake484d7352000-10-02 21:14:52 +00001417 Given a module name path, return the correct file path and
1418 archive name, compiling if necessary. For example, given
1419 /python/lib/string, return (/python/lib/string.pyc, string).
1420 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001421 file_py = pathname + ".py"
1422 file_pyc = pathname + ".pyc"
1423 file_pyo = pathname + ".pyo"
1424 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001425 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001426 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001427 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001428 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001429 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001430 if self.debug:
1431 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001432 try:
1433 py_compile.compile(file_py, file_pyc, None, True)
1434 except py_compile.PyCompileError,err:
1435 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001436 fname = file_pyc
1437 else:
1438 fname = file_pyc
1439 archivename = os.path.split(fname)[1]
1440 if basename:
1441 archivename = "%s/%s" % (basename, archivename)
1442 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001443
1444
1445def main(args = None):
1446 import textwrap
1447 USAGE=textwrap.dedent("""\
1448 Usage:
1449 zipfile.py -l zipfile.zip # Show listing of a zipfile
1450 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1451 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1452 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1453 """)
1454 if args is None:
1455 args = sys.argv[1:]
1456
1457 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1458 print USAGE
1459 sys.exit(1)
1460
1461 if args[0] == '-l':
1462 if len(args) != 2:
1463 print USAGE
1464 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001465 with ZipFile(args[1], 'r') as zf:
1466 zf.printdir()
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001467
1468 elif args[0] == '-t':
1469 if len(args) != 2:
1470 print USAGE
1471 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001472 with ZipFile(args[1], 'r') as zf:
1473 badfile = zf.testzip()
Antoine Pitroue1436d12010-08-12 15:25:51 +00001474 if badfile:
1475 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001476 print "Done testing"
1477
1478 elif args[0] == '-e':
1479 if len(args) != 3:
1480 print USAGE
1481 sys.exit(1)
1482
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001483 with ZipFile(args[1], 'r') as zf:
1484 out = args[2]
1485 for path in zf.namelist():
1486 if path.startswith('./'):
1487 tgt = os.path.join(out, path[2:])
1488 else:
1489 tgt = os.path.join(out, path)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001490
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001491 tgtdir = os.path.dirname(tgt)
1492 if not os.path.exists(tgtdir):
1493 os.makedirs(tgtdir)
1494 with open(tgt, 'wb') as fp:
1495 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001496
1497 elif args[0] == '-c':
1498 if len(args) < 3:
1499 print USAGE
1500 sys.exit(1)
1501
1502 def addToZip(zf, path, zippath):
1503 if os.path.isfile(path):
1504 zf.write(path, zippath, ZIP_DEFLATED)
1505 elif os.path.isdir(path):
1506 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001507 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001508 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001509 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001510
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001511 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1512 for src in args[2:]:
1513 addToZip(zf, src, os.path.basename(src))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001514
1515if __name__ == "__main__":
1516 main()