blob: 9dc4f57489fed908dc251c24dbb180bd89f8135c [file] [log] [blame]
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001"""
2Read and write ZIP files.
3"""
Georg Brandl62416bc2008-01-07 18:47:44 +00004import struct, os, time, sys, shutil
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00005import binascii, cStringIO, stat
Antoine Pitrou94c33eb2010-01-27 20:59:50 +00006import io
7import re
Guido van Rossum32abe6f2000-03-31 17:30:02 +00008
9try:
Tim Peterse1190062001-01-15 03:34:38 +000010 import zlib # We may need its compression method
Gregory P. Smithb89a0962008-03-19 01:46:10 +000011 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000012except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000013 zlib = None
Gregory P. Smithb89a0962008-03-19 01:46:10 +000014 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000015
Skip Montanaro40fc1602001-03-01 04:27:19 +000016__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Ronald Oussoren143cefb2006-06-15 08:14:18 +000017 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000018
Fred Drake5db246d2000-09-29 20:44:48 +000019class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000020 pass
Ronald Oussoren143cefb2006-06-15 08:14:18 +000021
22
23class LargeZipFile(Exception):
Tim Petersa608bb22006-06-15 18:06:29 +000024 """
Ronald Oussoren143cefb2006-06-15 08:14:18 +000025 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
26 and those extensions are disabled.
27 """
28
Tim Peterse1190062001-01-15 03:34:38 +000029error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000030
Amaury Forgeot d'Arcd25f87a2009-01-17 16:40:17 +000031ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwis8c436412008-07-03 12:51:14 +000032ZIP_FILECOUNT_LIMIT = 1 << 16
33ZIP_MAX_COMMENT = (1 << 16) - 1
Ronald Oussoren143cefb2006-06-15 08:14:18 +000034
Guido van Rossum32abe6f2000-03-31 17:30:02 +000035# constants for Zip file compression methods
36ZIP_STORED = 0
37ZIP_DEFLATED = 8
38# Other ZIP compression methods not supported
39
Martin v. Löwis8c436412008-07-03 12:51:14 +000040# Below are some formats and associated data for reading/writing headers using
41# the struct module. The names and structures of headers/records are those used
42# in the PKWARE description of the ZIP file format:
43# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
44# (URL valid as of January 2008)
Ronald Oussoren143cefb2006-06-15 08:14:18 +000045
Martin v. Löwis8c436412008-07-03 12:51:14 +000046# The "end of central directory" structure, magic number, size, and indices
47# (section V.I in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000048structEndArchive = "<4s4H2LH"
49stringEndArchive = "PK\005\006"
50sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +000051
52_ECD_SIGNATURE = 0
53_ECD_DISK_NUMBER = 1
54_ECD_DISK_START = 2
55_ECD_ENTRIES_THIS_DISK = 3
56_ECD_ENTRIES_TOTAL = 4
57_ECD_SIZE = 5
58_ECD_OFFSET = 6
59_ECD_COMMENT_SIZE = 7
60# These last two indices are not part of the structure as defined in the
61# spec, but they are used internally by this module as a convenience
62_ECD_COMMENT = 8
63_ECD_LOCATION = 9
64
65# The "central directory" structure, magic number, size, and indices
66# of entries in the structure (section V.F in the format document)
67structCentralDir = "<4s4B4HL2L5H2L"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000068stringCentralDir = "PK\001\002"
Martin v. Löwis8c436412008-07-03 12:51:14 +000069sizeCentralDir = struct.calcsize(structCentralDir)
70
Fred Drake3e038e52001-02-28 17:56:26 +000071# indexes of entries in the central directory structure
72_CD_SIGNATURE = 0
73_CD_CREATE_VERSION = 1
74_CD_CREATE_SYSTEM = 2
75_CD_EXTRACT_VERSION = 3
Martin v. Löwis8c436412008-07-03 12:51:14 +000076_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000077_CD_FLAG_BITS = 5
78_CD_COMPRESS_TYPE = 6
79_CD_TIME = 7
80_CD_DATE = 8
81_CD_CRC = 9
82_CD_COMPRESSED_SIZE = 10
83_CD_UNCOMPRESSED_SIZE = 11
84_CD_FILENAME_LENGTH = 12
85_CD_EXTRA_FIELD_LENGTH = 13
86_CD_COMMENT_LENGTH = 14
87_CD_DISK_NUMBER_START = 15
88_CD_INTERNAL_FILE_ATTRIBUTES = 16
89_CD_EXTERNAL_FILE_ATTRIBUTES = 17
90_CD_LOCAL_HEADER_OFFSET = 18
91
Martin v. Löwis8c436412008-07-03 12:51:14 +000092# The "local file header" structure, magic number, size, and indices
93# (section V.A in the format document)
94structFileHeader = "<4s2B4HL2L2H"
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +000095stringFileHeader = "PK\003\004"
Martin v. Löwis8c436412008-07-03 12:51:14 +000096sizeFileHeader = struct.calcsize(structFileHeader)
97
Fred Drake3e038e52001-02-28 17:56:26 +000098_FH_SIGNATURE = 0
99_FH_EXTRACT_VERSION = 1
Martin v. Löwis8c436412008-07-03 12:51:14 +0000100_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000101_FH_GENERAL_PURPOSE_FLAG_BITS = 3
102_FH_COMPRESSION_METHOD = 4
103_FH_LAST_MOD_TIME = 5
104_FH_LAST_MOD_DATE = 6
105_FH_CRC = 7
106_FH_COMPRESSED_SIZE = 8
107_FH_UNCOMPRESSED_SIZE = 9
108_FH_FILENAME_LENGTH = 10
109_FH_EXTRA_FIELD_LENGTH = 11
110
Martin v. Löwis8c436412008-07-03 12:51:14 +0000111# The "Zip64 end of central directory locator" structure, magic number, and size
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000112structEndArchive64Locator = "<4sLQL"
113stringEndArchive64Locator = "PK\x06\x07"
114sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000115
116# The "Zip64 end of central directory" record, magic number, size, and indices
117# (section V.G in the format document)
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000118structEndArchive64 = "<4sQ2H2L4Q"
119stringEndArchive64 = "PK\x06\x06"
120sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000121
122_CD64_SIGNATURE = 0
123_CD64_DIRECTORY_RECSIZE = 1
124_CD64_CREATE_VERSION = 2
125_CD64_EXTRACT_VERSION = 3
126_CD64_DISK_NUMBER = 4
127_CD64_DISK_NUMBER_START = 5
128_CD64_NUMBER_ENTRIES_THIS_DISK = 6
129_CD64_NUMBER_ENTRIES_TOTAL = 7
130_CD64_DIRECTORY_SIZE = 8
131_CD64_OFFSET_START_CENTDIR = 9
132
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000133def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000134 try:
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000135 if _EndRecData(fp):
136 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000137 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000138 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000139 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000140
Antoine Pitrou6f193e02008-12-27 15:43:12 +0000141def is_zipfile(filename):
142 """Quickly see if a file is a ZIP file by checking the magic number.
143
144 The filename argument may be a file or file-like object too.
145 """
146 result = False
147 try:
148 if hasattr(filename, "read"):
149 result = _check_zipfile(fp=filename)
150 else:
151 with open(filename, "rb") as fp:
152 result = _check_zipfile(fp)
153 except IOError:
154 pass
155 return result
156
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000157def _EndRecData64(fpin, offset, endrec):
158 """
159 Read the ZIP64 end-of-archive records and use that to update endrec
160 """
Georg Brandl86e0c892010-11-26 07:22:28 +0000161 try:
162 fpin.seek(offset - sizeEndCentDir64Locator, 2)
163 except IOError:
164 # If the seek fails, the file is not large enough to contain a ZIP64
165 # end-of-archive record, so just return the end record we were given.
166 return endrec
167
Martin v. Löwis8c436412008-07-03 12:51:14 +0000168 data = fpin.read(sizeEndCentDir64Locator)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200169 if len(data) != sizeEndCentDir64Locator:
170 return endrec
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000171 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
172 if sig != stringEndArchive64Locator:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000173 return endrec
174
175 if diskno != 0 or disks != 1:
176 raise BadZipfile("zipfiles that span multiple disks are not supported")
177
Tim Petersa608bb22006-06-15 18:06:29 +0000178 # Assume no 'zip64 extensible data'
Martin v. Löwis8c436412008-07-03 12:51:14 +0000179 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
180 data = fpin.read(sizeEndCentDir64)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200181 if len(data) != sizeEndCentDir64:
182 return endrec
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000183 sig, sz, create_version, read_version, disk_num, disk_dir, \
184 dircount, dircount2, dirsize, diroffset = \
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000185 struct.unpack(structEndArchive64, data)
186 if sig != stringEndArchive64:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000187 return endrec
188
189 # Update the original endrec using data from the ZIP64 record
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000190 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwis8c436412008-07-03 12:51:14 +0000191 endrec[_ECD_DISK_NUMBER] = disk_num
192 endrec[_ECD_DISK_START] = disk_dir
193 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
194 endrec[_ECD_ENTRIES_TOTAL] = dircount2
195 endrec[_ECD_SIZE] = dirsize
196 endrec[_ECD_OFFSET] = diroffset
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000197 return endrec
198
199
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000200def _EndRecData(fpin):
201 """Return data from the "End of Central Directory" record, or None.
202
203 The data is a list of the nine items in the ZIP "End of central dir"
204 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwis8c436412008-07-03 12:51:14 +0000205
206 # Determine file size
207 fpin.seek(0, 2)
208 filesize = fpin.tell()
209
210 # Check to see if this is ZIP file with no archive comment (the
211 # "end of central directory" structure should be the last item in the
212 # file if this is the case).
Amaury Forgeot d'Arc3e5b0272009-07-28 22:15:30 +0000213 try:
214 fpin.seek(-sizeEndCentDir, 2)
215 except IOError:
216 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000217 data = fpin.read()
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200218 if (len(data) == sizeEndCentDir and
219 data[0:4] == stringEndArchive and
220 data[-2:] == b"\000\000"):
Martin v. Löwis8c436412008-07-03 12:51:14 +0000221 # the signature is correct and there's no comment, unpack structure
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000222 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000223 endrec=list(endrec)
224
225 # Append a blank comment and record start offset
226 endrec.append("")
227 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000228
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000229 # Try to read the "Zip64 end of central directory" structure
230 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000231
232 # Either this is not a ZIP file, or it is a ZIP file with an archive
233 # comment. Search the end of the file for the "end of central directory"
234 # record signature. The comment is the last item in the ZIP file and may be
235 # up to 64K long. It is assumed that the "end of central directory" magic
236 # number does not appear in the comment.
237 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
238 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000239 data = fpin.read()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000240 start = data.rfind(stringEndArchive)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000241 if start >= 0:
242 # found the magic number; attempt to unpack and interpret
243 recData = data[start:start+sizeEndCentDir]
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200244 if len(recData) != sizeEndCentDir:
245 # Zip file is corrupted.
246 return None
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000247 endrec = list(struct.unpack(structEndArchive, recData))
R David Murray873c5832011-06-09 16:01:09 -0400248 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
249 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
250 endrec.append(comment)
251 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arc2407ac92009-01-17 22:43:50 +0000252
R David Murray873c5832011-06-09 16:01:09 -0400253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, maxCommentStart + start - filesize,
255 endrec)
Martin v. Löwis8c436412008-07-03 12:51:14 +0000256
257 # Unable to find a valid end of central directory structure
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200258 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000259
Fred Drake484d7352000-10-02 21:14:52 +0000260
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000261class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000262 """Class with attributes describing each file in the ZIP archive."""
263
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000264 __slots__ = (
265 'orig_filename',
266 'filename',
267 'date_time',
268 'compress_type',
269 'comment',
270 'extra',
271 'create_system',
272 'create_version',
273 'extract_version',
274 'reserved',
275 'flag_bits',
276 'volume',
277 'internal_attr',
278 'external_attr',
279 'header_offset',
280 'CRC',
281 'compress_size',
282 'file_size',
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000283 '_raw_time',
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000284 )
285
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000286 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000287 self.orig_filename = filename # Original file name in archive
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000288
289 # Terminate the file name at the first null byte. Null bytes in file
290 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000291 null_byte = filename.find(chr(0))
292 if null_byte >= 0:
293 filename = filename[0:null_byte]
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000294 # This is used to ensure paths in generated ZIP files always use
295 # forward slashes as the directory separator, as required by the
296 # ZIP format specification.
297 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000298 filename = filename.replace(os.sep, "/")
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000299
Greg Ward8e36d282003-06-18 00:53:06 +0000300 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000301 self.date_time = date_time # year, month, day, hour, min, sec
Senthil Kumaranddd40312011-10-20 01:38:35 +0800302
303 if date_time[0] < 1980:
304 raise ValueError('ZIP does not support timestamps before 1980')
305
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000306 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000307 self.compress_type = ZIP_STORED # Type of compression for the file
308 self.comment = "" # Comment for each file
309 self.extra = "" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000310 if sys.platform == 'win32':
311 self.create_system = 0 # System which created ZIP archive
312 else:
313 # Assume everything else is unix-y
314 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000315 self.create_version = 20 # Version which created ZIP archive
316 self.extract_version = 20 # Version needed to extract archive
317 self.reserved = 0 # Must be zero
318 self.flag_bits = 0 # ZIP flag bits
319 self.volume = 0 # Volume number of file header
320 self.internal_attr = 0 # Internal attributes
321 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000323 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000324 # CRC CRC-32 of the uncompressed file
325 # compress_size Size of the compressed file
326 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000327
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200328 def FileHeader(self, zip64=None):
Fred Drake484d7352000-10-02 21:14:52 +0000329 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000330 dt = self.date_time
331 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000332 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000333 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000334 # Set these to zero because we write them after the file data
335 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000336 else:
Tim Peterse1190062001-01-15 03:34:38 +0000337 CRC = self.CRC
338 compress_size = self.compress_size
339 file_size = self.file_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000340
341 extra = self.extra
342
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200343 if zip64 is None:
344 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
345 if zip64:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000346 fmt = '<HHQQ'
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000347 extra = extra + struct.pack(fmt,
348 1, struct.calcsize(fmt)-4, file_size, compress_size)
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +0200349 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
350 if not zip64:
351 raise LargeZipFile("Filesize would require ZIP64 extensions")
352 # File is larger than what fits into a 4 byte integer,
353 # fall back to the ZIP64 extension
Martin v. Löwis8c436412008-07-03 12:51:14 +0000354 file_size = 0xffffffff
355 compress_size = 0xffffffff
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000356 self.extract_version = max(45, self.extract_version)
357 self.create_version = max(45, self.extract_version)
358
Martin v. Löwis471617d2008-05-05 17:16:58 +0000359 filename, flag_bits = self._encodeFilenameFlags()
Amaury Forgeot d'Arcae6d2b92008-07-11 21:28:25 +0000360 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000361 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000362 self.compress_type, dostime, dosdate, CRC,
363 compress_size, file_size,
Martin v. Löwis471617d2008-05-05 17:16:58 +0000364 len(filename), len(extra))
365 return header + filename + extra
366
367 def _encodeFilenameFlags(self):
368 if isinstance(self.filename, unicode):
369 try:
370 return self.filename.encode('ascii'), self.flag_bits
371 except UnicodeEncodeError:
372 return self.filename.encode('utf-8'), self.flag_bits | 0x800
373 else:
374 return self.filename, self.flag_bits
375
376 def _decodeFilename(self):
377 if self.flag_bits & 0x800:
378 return self.filename.decode('utf-8')
379 else:
380 return self.filename
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000381
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000382 def _decodeExtra(self):
383 # Try to decode the extra field.
384 extra = self.extra
385 unpack = struct.unpack
386 while extra:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000387 tp, ln = unpack('<HH', extra[:4])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000388 if tp == 1:
389 if ln >= 24:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000390 counts = unpack('<QQQ', extra[4:28])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000391 elif ln == 16:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000392 counts = unpack('<QQ', extra[4:20])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000393 elif ln == 8:
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000394 counts = unpack('<Q', extra[4:12])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000395 elif ln == 0:
396 counts = ()
397 else:
398 raise RuntimeError, "Corrupt extra field %s"%(ln,)
399
400 idx = 0
401
402 # ZIP64 extension (large files and/or large archives)
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +0000403 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000404 self.file_size = counts[idx]
405 idx += 1
406
Martin v. Löwis8c436412008-07-03 12:51:14 +0000407 if self.compress_size == 0xFFFFFFFFL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000408 self.compress_size = counts[idx]
409 idx += 1
410
Martin v. Löwis8c436412008-07-03 12:51:14 +0000411 if self.header_offset == 0xffffffffL:
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000412 old = self.header_offset
413 self.header_offset = counts[idx]
414 idx+=1
415
416 extra = extra[ln+4:]
Tim Petersa608bb22006-06-15 18:06:29 +0000417
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000418
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000419class _ZipDecrypter:
420 """Class to handle decryption of files stored within a ZIP archive.
421
422 ZIP supports a password-based form of encryption. Even though known
423 plaintext attacks have been found against it, it is still useful
Gregory P. Smithda407232008-01-20 01:32:00 +0000424 to be able to get data out of such a file.
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000425
426 Usage:
427 zd = _ZipDecrypter(mypwd)
428 plain_char = zd(cypher_char)
429 plain_text = map(zd, cypher_text)
430 """
431
432 def _GenerateCRCTable():
433 """Generate a CRC-32 table.
434
435 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
436 internal keys. We noticed that a direct implementation is faster than
437 relying on binascii.crc32().
438 """
439 poly = 0xedb88320
440 table = [0] * 256
441 for i in range(256):
442 crc = i
443 for j in range(8):
444 if crc & 1:
445 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
446 else:
447 crc = ((crc >> 1) & 0x7FFFFFFF)
448 table[i] = crc
449 return table
450 crctable = _GenerateCRCTable()
451
452 def _crc32(self, ch, crc):
453 """Compute the CRC32 primitive on one byte."""
454 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
455
456 def __init__(self, pwd):
457 self.key0 = 305419896
458 self.key1 = 591751049
459 self.key2 = 878082192
460 for p in pwd:
461 self._UpdateKeys(p)
462
463 def _UpdateKeys(self, c):
464 self.key0 = self._crc32(c, self.key0)
465 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
466 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
467 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
468
469 def __call__(self, c):
470 """Decrypt a single character."""
471 c = ord(c)
472 k = self.key2 | 2
473 c = c ^ (((k * (k^1)) >> 8) & 255)
474 c = chr(c)
475 self._UpdateKeys(c)
476 return c
477
Ezio Melotti9e949722012-11-18 13:18:06 +0200478
479compressor_names = {
480 0: 'store',
481 1: 'shrink',
482 2: 'reduce',
483 3: 'reduce',
484 4: 'reduce',
485 5: 'reduce',
486 6: 'implode',
487 7: 'tokenize',
488 8: 'deflate',
489 9: 'deflate64',
490 10: 'implode',
491 12: 'bzip2',
492 14: 'lzma',
493 18: 'terse',
494 19: 'lz77',
495 97: 'wavpack',
496 98: 'ppmd',
497}
498
499
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000500class ZipExtFile(io.BufferedIOBase):
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000501 """File-like object for reading an archive member.
Tim Petersea5962f2007-03-12 18:07:52 +0000502 Is returned by ZipFile.open().
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000503 """
Tim Petersea5962f2007-03-12 18:07:52 +0000504
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000505 # Max size supported by decompressor.
506 MAX_N = 1 << 31 - 1
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000507
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000508 # Read from compressed files in 4k blocks.
509 MIN_READ_SIZE = 4096
Tim Petersea5962f2007-03-12 18:07:52 +0000510
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000511 # Search for universal newlines or line chunks.
512 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
513
Jesus Cea93d628b2012-11-04 02:32:08 +0100514 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
515 close_fileobj=False):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000516 self._fileobj = fileobj
517 self._decrypter = decrypter
Jesus Cea93d628b2012-11-04 02:32:08 +0100518 self._close_fileobj = close_fileobj
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000519
Ezio Melotti4611b052010-01-28 01:41:30 +0000520 self._compress_type = zipinfo.compress_type
521 self._compress_size = zipinfo.compress_size
522 self._compress_left = zipinfo.compress_size
523
524 if self._compress_type == ZIP_DEFLATED:
525 self._decompressor = zlib.decompressobj(-15)
Ezio Melotti9e949722012-11-18 13:18:06 +0200526 elif self._compress_type != ZIP_STORED:
527 descr = compressor_names.get(self._compress_type)
528 if descr:
529 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
530 else:
531 raise NotImplementedError("compression type %d" % (self._compress_type,))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000532 self._unconsumed = ''
533
534 self._readbuffer = ''
535 self._offset = 0
536
537 self._universal = 'U' in mode
538 self.newlines = None
539
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000540 # Adjust read size for encrypted files since the first 12 bytes
541 # are for the encryption/password information.
542 if self._decrypter is not None:
543 self._compress_left -= 12
544
545 self.mode = mode
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000546 self.name = zipinfo.filename
547
Antoine Pitroue1436d12010-08-12 15:25:51 +0000548 if hasattr(zipinfo, 'CRC'):
549 self._expected_crc = zipinfo.CRC
550 self._running_crc = crc32(b'') & 0xffffffff
551 else:
552 self._expected_crc = None
553
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000554 def readline(self, limit=-1):
555 """Read and return a line from the stream.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000556
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000557 If limit is specified, at most limit bytes will be read.
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000558 """
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000559
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000560 if not self._universal and limit < 0:
561 # Shortcut common case - newline found in buffer.
562 i = self._readbuffer.find('\n', self._offset) + 1
563 if i > 0:
564 line = self._readbuffer[self._offset: i]
565 self._offset = i
566 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000567
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000568 if not self._universal:
569 return io.BufferedIOBase.readline(self, limit)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000570
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000571 line = ''
572 while limit < 0 or len(line) < limit:
573 readahead = self.peek(2)
574 if readahead == '':
575 return line
Tim Petersea5962f2007-03-12 18:07:52 +0000576
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000577 #
578 # Search for universal newlines or line chunks.
579 #
580 # The pattern returns either a line chunk or a newline, but not
581 # both. Combined with peek(2), we are assured that the sequence
582 # '\r\n' is always retrieved completely and never split into
583 # separate newlines - '\r', '\n' due to coincidental readaheads.
584 #
585 match = self.PATTERN.search(readahead)
586 newline = match.group('newline')
587 if newline is not None:
588 if self.newlines is None:
589 self.newlines = []
590 if newline not in self.newlines:
591 self.newlines.append(newline)
592 self._offset += len(newline)
593 return line + '\n'
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000594
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000595 chunk = match.group('chunk')
596 if limit >= 0:
597 chunk = chunk[: limit - len(line)]
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000598
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000599 self._offset += len(chunk)
600 line += chunk
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000601
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000602 return line
603
604 def peek(self, n=1):
605 """Returns buffered bytes without advancing the position."""
606 if n > len(self._readbuffer) - self._offset:
607 chunk = self.read(n)
608 self._offset -= len(chunk)
609
610 # Return up to 512 bytes to reduce allocation overhead for tight loops.
611 return self._readbuffer[self._offset: self._offset + 512]
612
613 def readable(self):
614 return True
615
616 def read(self, n=-1):
617 """Read and return up to n bytes.
618 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000619 """
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000620 buf = ''
Antoine Pitroue4195e82010-09-12 14:56:27 +0000621 if n is None:
622 n = -1
623 while True:
624 if n < 0:
625 data = self.read1(n)
626 elif n > len(buf):
627 data = self.read1(n - len(buf))
628 else:
629 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000630 if len(data) == 0:
631 return buf
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000632 buf += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000633
Antoine Pitroue1436d12010-08-12 15:25:51 +0000634 def _update_crc(self, newdata, eof):
635 # Update the CRC using the given data.
636 if self._expected_crc is None:
637 # No need to compute the CRC if we don't have a reference value
638 return
639 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
640 # Check the CRC if we're at the end of the file
641 if eof and self._running_crc != self._expected_crc:
642 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
643
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000644 def read1(self, n):
645 """Read up to n bytes with at most one read() system call."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000646
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000647 # Simplify algorithm (branching) by transforming negative n to large n.
648 if n < 0 or n is None:
649 n = self.MAX_N
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000650
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000651 # Bytes available in read buffer.
652 len_readbuffer = len(self._readbuffer) - self._offset
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000653
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000654 # Read from file.
655 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
656 nbytes = n - len_readbuffer - len(self._unconsumed)
657 nbytes = max(nbytes, self.MIN_READ_SIZE)
658 nbytes = min(nbytes, self._compress_left)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000659
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000660 data = self._fileobj.read(nbytes)
661 self._compress_left -= len(data)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000662
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000663 if data and self._decrypter is not None:
664 data = ''.join(map(self._decrypter, data))
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000665
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000666 if self._compress_type == ZIP_STORED:
Antoine Pitroue1436d12010-08-12 15:25:51 +0000667 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000668 self._readbuffer = self._readbuffer[self._offset:] + data
669 self._offset = 0
670 else:
671 # Prepare deflated bytes for decompression.
672 self._unconsumed += data
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000673
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000674 # Handle unconsumed data.
Ezio Melotti4611b052010-01-28 01:41:30 +0000675 if (len(self._unconsumed) > 0 and n > len_readbuffer and
676 self._compress_type == ZIP_DEFLATED):
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000677 data = self._decompressor.decompress(
678 self._unconsumed,
679 max(n - len_readbuffer, self.MIN_READ_SIZE)
680 )
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000681
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000682 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitroue1436d12010-08-12 15:25:51 +0000683 eof = len(self._unconsumed) == 0 and self._compress_left == 0
684 if eof:
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000685 data += self._decompressor.flush()
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000686
Antoine Pitroue1436d12010-08-12 15:25:51 +0000687 self._update_crc(data, eof=eof)
Antoine Pitrou94c33eb2010-01-27 20:59:50 +0000688 self._readbuffer = self._readbuffer[self._offset:] + data
689 self._offset = 0
690
691 # Read from buffer.
692 data = self._readbuffer[self._offset: self._offset + n]
693 self._offset += len(data)
694 return data
695
Jesus Cea93d628b2012-11-04 02:32:08 +0100696 def close(self):
697 try :
698 if self._close_fileobj:
699 self._fileobj.close()
700 finally:
701 super(ZipExtFile, self).close()
Tim Petersea5962f2007-03-12 18:07:52 +0000702
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000703
R David Murray3f4ccba2012-04-12 18:42:47 -0400704class ZipFile(object):
Tim Petersa19a1682001-03-29 04:36:09 +0000705 """ Class with methods to open, read, write, close, list zip files.
706
Martin v. Löwis8c436412008-07-03 12:51:14 +0000707 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000708
Fred Drake3d9091e2001-03-26 15:49:24 +0000709 file: Either the path to the file, or a file-like object.
710 If it is a path, the file will be opened and closed by ZipFile.
711 mode: The mode can be either read "r", write "w" or append "a".
712 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000713 allowZip64: if True ZipFile will create files with ZIP64 extensions when
714 needed, otherwise it will raise an exception when this would
715 be necessary.
716
Fred Drake3d9091e2001-03-26 15:49:24 +0000717 """
Fred Drake484d7352000-10-02 21:14:52 +0000718
Fred Drake90eac282001-02-28 05:29:34 +0000719 fp = None # Set here since __del__ checks it
720
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000721 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000722 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000723 if mode not in ("r", "w", "a"):
724 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
725
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 if compression == ZIP_STORED:
727 pass
728 elif compression == ZIP_DEFLATED:
729 if not zlib:
730 raise RuntimeError,\
Fred Drake5db246d2000-09-29 20:44:48 +0000731 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000732 else:
733 raise RuntimeError, "That compression method is not supported"
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000734
735 self._allowZip64 = allowZip64
736 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000737 self.debug = 0 # Level of printing: 0 through 3
738 self.NameToInfo = {} # Find file info given name
739 self.filelist = [] # List of ZipInfo instances for archive
740 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000741 self.mode = key = mode.replace('b', '')[0]
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000742 self.pwd = None
R David Murray3f4ccba2012-04-12 18:42:47 -0400743 self._comment = ''
Tim Petersa19a1682001-03-29 04:36:09 +0000744
Fred Drake3d9091e2001-03-26 15:49:24 +0000745 # Check if we were passed a file-like object
Walter Dörwald65230a22002-06-03 15:58:32 +0000746 if isinstance(file, basestring):
Fred Drake3d9091e2001-03-26 15:49:24 +0000747 self._filePassed = 0
748 self.filename = file
749 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Martin v. Löwis84f6de92007-02-13 10:10:39 +0000750 try:
751 self.fp = open(file, modeDict[mode])
752 except IOError:
753 if mode == 'a':
754 mode = key = 'w'
755 self.fp = open(file, modeDict[mode])
756 else:
757 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000758 else:
759 self._filePassed = 1
760 self.fp = file
761 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000762
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100763 try:
764 if key == 'r':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000765 self._RealGetContents()
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100766 elif key == 'w':
Georg Brandl86e0c892010-11-26 07:22:28 +0000767 # set the modified flag so central directory gets written
768 # even if no files are added to the archive
769 self._didModify = True
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100770 elif key == 'a':
771 try:
772 # See if file is a zip file
773 self._RealGetContents()
774 # seek to start of directory and overwrite
775 self.fp.seek(self.start_dir, 0)
776 except BadZipfile:
777 # file is not a zip file, just append
778 self.fp.seek(0, 2)
779
780 # set the modified flag so central directory gets written
781 # even if no files are added to the archive
782 self._didModify = True
783 else:
784 raise RuntimeError('Mode must be "r", "w" or "a"')
785 except:
786 fp = self.fp
787 self.fp = None
Tim Peters7d3bad62001-04-04 18:56:49 +0000788 if not self._filePassed:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100789 fp.close()
790 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791
Ezio Melotti569e61f2009-12-30 06:14:51 +0000792 def __enter__(self):
793 return self
794
795 def __exit__(self, type, value, traceback):
796 self.close()
797
Tim Peters7d3bad62001-04-04 18:56:49 +0000798 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000799 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000800 fp = self.fp
Georg Brandl86e0c892010-11-26 07:22:28 +0000801 try:
802 endrec = _EndRecData(fp)
803 except IOError:
804 raise BadZipfile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000805 if not endrec:
806 raise BadZipfile, "File is not a zip file"
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807 if self.debug > 1:
808 print endrec
Martin v. Löwis8c436412008-07-03 12:51:14 +0000809 size_cd = endrec[_ECD_SIZE] # bytes in central directory
810 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
R David Murray3f4ccba2012-04-12 18:42:47 -0400811 self._comment = endrec[_ECD_COMMENT] # archive comment
Martin v. Löwis8c436412008-07-03 12:51:14 +0000812
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000813 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwis8c436412008-07-03 12:51:14 +0000814 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrouebcd0ce2008-09-05 23:30:23 +0000815 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
816 # If Zip64 extension structures are present, account for them
Martin v. Löwis8c436412008-07-03 12:51:14 +0000817 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
818
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 if self.debug > 2:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000820 inferred = concat + offset_cd
821 print "given, inferred, offset", offset_cd, inferred, concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 # self.start_dir: Position of start of central directory
823 self.start_dir = offset_cd + concat
824 fp.seek(self.start_dir, 0)
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000825 data = fp.read(size_cd)
826 fp = cStringIO.StringIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000827 total = 0
828 while total < size_cd:
Martin v. Löwis8c436412008-07-03 12:51:14 +0000829 centdir = fp.read(sizeCentralDir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200830 if len(centdir) != sizeCentralDir:
831 raise BadZipfile("Truncated central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000832 centdir = struct.unpack(structCentralDir, centdir)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200833 if centdir[_CD_SIGNATURE] != stringCentralDir:
834 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000835 if self.debug > 2:
836 print centdir
Fred Drake3e038e52001-02-28 17:56:26 +0000837 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 # Create ZipInfo instance to store file information
839 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000840 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
841 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000842 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000843 (x.create_version, x.create_system, x.extract_version, x.reserved,
844 x.flag_bits, x.compress_type, t, d,
845 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
846 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
847 # Convert date/time code to (year, month, day, hour, min, sec)
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000848 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000849 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000850 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000851
852 x._decodeExtra()
853 x.header_offset = x.header_offset + concat
Martin v. Löwis471617d2008-05-05 17:16:58 +0000854 x.filename = x._decodeFilename()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855 self.filelist.append(x)
856 self.NameToInfo[x.filename] = x
Martin v. Löwis8c436412008-07-03 12:51:14 +0000857
858 # update total bytes read from central directory
859 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
860 + centdir[_CD_EXTRA_FIELD_LENGTH]
861 + centdir[_CD_COMMENT_LENGTH])
862
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000863 if self.debug > 2:
864 print "total", total
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000865
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000866
867 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000868 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000869 l = []
870 for data in self.filelist:
871 l.append(data.filename)
872 return l
873
874 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000875 """Return a list of class ZipInfo instances for files in the
876 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877 return self.filelist
878
879 def printdir(self):
Fred Drake484d7352000-10-02 21:14:52 +0000880 """Print a table of contents for the zip file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000881 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
882 for zinfo in self.filelist:
Raymond Hettinger351e1a32008-01-14 22:58:05 +0000883 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000884 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
885
886 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000887 """Read all the files and check the CRC."""
Antoine Pitrouc5342702008-08-17 13:06:29 +0000888 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000889 for zinfo in self.filelist:
890 try:
Antoine Pitrouc5342702008-08-17 13:06:29 +0000891 # Read by chunks, to avoid an OverflowError or a
892 # MemoryError with very large embedded files.
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100893 with self.open(zinfo.filename, "r") as f:
894 while f.read(chunk_size): # Check CRC-32
895 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000896 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000897 return zinfo.filename
898
899 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000900 """Return the instance of ZipInfo given 'name'."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +0000901 info = self.NameToInfo.get(name)
902 if info is None:
903 raise KeyError(
904 'There is no item named %r in the archive' % name)
905
906 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000908 def setpassword(self, pwd):
909 """Set default password for encrypted files."""
910 self.pwd = pwd
911
R David Murray3f4ccba2012-04-12 18:42:47 -0400912 @property
913 def comment(self):
914 """The comment text associated with the ZIP file."""
915 return self._comment
916
917 @comment.setter
918 def comment(self, comment):
919 # check for valid comment length
920 if len(comment) >= ZIP_MAX_COMMENT:
921 if self.debug:
922 print('Archive comment is too long; truncating to %d bytes'
923 % ZIP_MAX_COMMENT)
924 comment = comment[:ZIP_MAX_COMMENT]
925 self._comment = comment
926 self._didModify = True
927
Martin v. Löwisc6d626e2007-02-13 09:49:38 +0000928 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000929 """Return file bytes (as a string) for name."""
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000930 return self.open(name, "r", pwd).read()
931
932 def open(self, name, mode="r", pwd=None):
933 """Return file-like object for 'name'."""
934 if mode not in ("r", "U", "rU"):
935 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000936 if not self.fp:
937 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +0000938 "Attempt to read ZIP archive that was already closed"
Ronald Oussoren143cefb2006-06-15 08:14:18 +0000939
Tim Petersea5962f2007-03-12 18:07:52 +0000940 # Only open a new file for instances where we were not
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000941 # given a file object in the constructor
942 if self._filePassed:
943 zef_file = self.fp
Jesus Cea93d628b2012-11-04 02:32:08 +0100944 should_close = False
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000945 else:
946 zef_file = open(self.filename, 'rb')
Jesus Cea93d628b2012-11-04 02:32:08 +0100947 should_close = True
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000948
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100949 try:
950 # Make sure we have an info object
951 if isinstance(name, ZipInfo):
952 # 'name' is already an info object
953 zinfo = name
Gregory P. Smith0c63fc22008-01-20 01:21:03 +0000954 else:
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100955 # Get info object for name
956 zinfo = self.getinfo(name)
Martin v. Löwis3eb76482007-03-06 10:41:24 +0000957
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100958 zef_file.seek(zinfo.header_offset, 0)
959
960 # Skip the file header:
961 fheader = zef_file.read(sizeFileHeader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200962 if len(fheader) != sizeFileHeader:
963 raise BadZipfile("Truncated file header")
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100964 fheader = struct.unpack(structFileHeader, fheader)
Serhiy Storchaka0be506a2013-01-31 15:26:55 +0200965 if fheader[_FH_SIGNATURE] != stringFileHeader:
966 raise BadZipfile("Bad magic number for file header")
967
Antoine Pitrou02512fb2012-11-17 23:56:53 +0100968 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
969 if fheader[_FH_EXTRA_FIELD_LENGTH]:
970 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
971
972 if fname != zinfo.orig_filename:
973 raise BadZipfile, \
974 'File name in directory "%s" and header "%s" differ.' % (
975 zinfo.orig_filename, fname)
976
977 # check for encrypted flag & handle password
978 is_encrypted = zinfo.flag_bits & 0x1
979 zd = None
980 if is_encrypted:
981 if not pwd:
982 pwd = self.pwd
983 if not pwd:
984 raise RuntimeError, "File %s is encrypted, " \
985 "password required for extraction" % name
986
987 zd = _ZipDecrypter(pwd)
988 # The first 12 bytes in the cypher stream is an encryption header
989 # used to strengthen the algorithm. The first 11 bytes are
990 # completely random, while the 12th contains the MSB of the CRC,
991 # or the MSB of the file time depending on the header type
992 # and is used to check the correctness of the password.
993 bytes = zef_file.read(12)
994 h = map(zd, bytes[0:12])
995 if zinfo.flag_bits & 0x8:
996 # compare against the file type from extended local headers
997 check_byte = (zinfo._raw_time >> 8) & 0xff
998 else:
999 # compare against the CRC otherwise
1000 check_byte = (zinfo.CRC >> 24) & 0xff
1001 if ord(h[11]) != check_byte:
1002 raise RuntimeError("Bad password for file", name)
1003
1004 return ZipExtFile(zef_file, mode, zinfo, zd,
1005 close_fileobj=should_close)
1006 except:
1007 if should_close:
1008 zef_file.close()
1009 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001010
Georg Brandl62416bc2008-01-07 18:47:44 +00001011 def extract(self, member, path=None, pwd=None):
1012 """Extract a member from the archive to the current working directory,
1013 using its full name. Its file information is extracted as accurately
1014 as possible. `member' may be a filename or a ZipInfo object. You can
1015 specify a different directory using `path'.
1016 """
1017 if not isinstance(member, ZipInfo):
1018 member = self.getinfo(member)
1019
1020 if path is None:
1021 path = os.getcwd()
1022
1023 return self._extract_member(member, path, pwd)
1024
1025 def extractall(self, path=None, members=None, pwd=None):
1026 """Extract all members from the archive to the current working
1027 directory. `path' specifies a different directory to extract to.
1028 `members' is optional and must be a subset of the list returned
1029 by namelist().
1030 """
1031 if members is None:
1032 members = self.namelist()
1033
1034 for zipinfo in members:
1035 self.extract(zipinfo, path, pwd)
1036
1037 def _extract_member(self, member, targetpath, pwd):
1038 """Extract the ZipInfo object 'member' to a physical
1039 file on the path targetpath.
1040 """
1041 # build the destination pathname, replacing
1042 # forward slashes to platform specific separators.
Antoine Pitrou97377bf2009-05-04 21:17:17 +00001043 # Strip trailing path separator, unless it represents the root.
1044 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1045 and len(os.path.splitdrive(targetpath)[1]) > 1):
Georg Brandl62416bc2008-01-07 18:47:44 +00001046 targetpath = targetpath[:-1]
1047
1048 # don't include leading "/" from file name if present
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001049 if member.filename[0] == '/':
Georg Brandl62416bc2008-01-07 18:47:44 +00001050 targetpath = os.path.join(targetpath, member.filename[1:])
1051 else:
1052 targetpath = os.path.join(targetpath, member.filename)
1053
1054 targetpath = os.path.normpath(targetpath)
1055
1056 # Create all upper directories if necessary.
1057 upperdirs = os.path.dirname(targetpath)
1058 if upperdirs and not os.path.exists(upperdirs):
1059 os.makedirs(upperdirs)
1060
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001061 if member.filename[-1] == '/':
Martin v. Löwis0b09c422009-05-24 19:30:52 +00001062 if not os.path.isdir(targetpath):
1063 os.mkdir(targetpath)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001064 return targetpath
1065
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001066 with self.open(member, pwd=pwd) as source, \
1067 file(targetpath, "wb") as target:
1068 shutil.copyfileobj(source, target)
Georg Brandl62416bc2008-01-07 18:47:44 +00001069
1070 return targetpath
1071
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001073 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001074 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001075 if self.debug: # Warning for duplicate names
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076 print "Duplicate name:", zinfo.filename
1077 if self.mode not in ("w", "a"):
1078 raise RuntimeError, 'write() requires mode "w" or "a"'
1079 if not self.fp:
1080 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001081 "Attempt to write ZIP archive that was already closed"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001082 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1083 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001084 "Compression requires the (missing) zlib module"
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001085 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1086 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001087 "That compression method is not supported"
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001088 if zinfo.file_size > ZIP64_LIMIT:
1089 if not self._allowZip64:
1090 raise LargeZipFile("Filesize would require ZIP64 extensions")
1091 if zinfo.header_offset > ZIP64_LIMIT:
1092 if not self._allowZip64:
1093 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094
1095 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001096 """Put the bytes from filename into the archive under the name
1097 arcname."""
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001098 if not self.fp:
1099 raise RuntimeError(
1100 "Attempt to write to ZIP archive that was already closed")
1101
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001102 st = os.stat(filename)
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001103 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001104 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001105 date_time = mtime[0:6]
1106 # Create ZipInfo instance to store file information
1107 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001108 arcname = filename
1109 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1110 while arcname[0] in (os.sep, os.altsep):
1111 arcname = arcname[1:]
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001112 if isdir:
1113 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001114 zinfo = ZipInfo(arcname, date_time)
Andrew M. Kuchling55430212004-07-10 15:40:29 +00001115 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001116 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001117 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 else:
Tim Peterse1190062001-01-15 03:34:38 +00001119 zinfo.compress_type = compress_type
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001120
1121 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001122 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001123 zinfo.header_offset = self.fp.tell() # Start of header bytes
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001124
1125 self._writecheck(zinfo)
1126 self._didModify = True
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001127
1128 if isdir:
1129 zinfo.file_size = 0
1130 zinfo.compress_size = 0
1131 zinfo.CRC = 0
1132 self.filelist.append(zinfo)
1133 self.NameToInfo[zinfo.filename] = zinfo
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001134 self.fp.write(zinfo.FileHeader(False))
Martin v. Löwis0dfcfc82009-01-24 14:00:33 +00001135 return
1136
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001137 with open(filename, "rb") as fp:
1138 # Must overwrite CRC and sizes with correct data later
1139 zinfo.CRC = CRC = 0
1140 zinfo.compress_size = compress_size = 0
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001141 # Compressed size can be larger than uncompressed size
1142 zip64 = self._allowZip64 and \
1143 zinfo.file_size * 1.05 > ZIP64_LIMIT
1144 self.fp.write(zinfo.FileHeader(zip64))
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001145 if zinfo.compress_type == ZIP_DEFLATED:
1146 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1147 zlib.DEFLATED, -15)
1148 else:
1149 cmpr = None
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001150 file_size = 0
Benjamin Petersonb91e8ed2009-05-10 02:29:00 +00001151 while 1:
1152 buf = fp.read(1024 * 8)
1153 if not buf:
1154 break
1155 file_size = file_size + len(buf)
1156 CRC = crc32(buf, CRC) & 0xffffffff
1157 if cmpr:
1158 buf = cmpr.compress(buf)
1159 compress_size = compress_size + len(buf)
1160 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001161 if cmpr:
1162 buf = cmpr.flush()
1163 compress_size = compress_size + len(buf)
1164 self.fp.write(buf)
1165 zinfo.compress_size = compress_size
1166 else:
1167 zinfo.compress_size = file_size
1168 zinfo.CRC = CRC
1169 zinfo.file_size = file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001170 if not zip64 and self._allowZip64:
1171 if file_size > ZIP64_LIMIT:
1172 raise RuntimeError('File size has increased during compressing')
1173 if compress_size > ZIP64_LIMIT:
1174 raise RuntimeError('Compressed size larger than uncompressed size')
1175 # Seek backwards and write file header (which will now include
1176 # correct CRC and file sizes)
Tim Petersb64bec32001-09-18 02:26:39 +00001177 position = self.fp.tell() # Preserve current position in file
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001178 self.fp.seek(zinfo.header_offset, 0)
1179 self.fp.write(zinfo.FileHeader(zip64))
Finn Bock03a3bb82001-09-05 18:40:33 +00001180 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001181 self.filelist.append(zinfo)
1182 self.NameToInfo[zinfo.filename] = zinfo
1183
Ronald Oussorendd25e862010-02-07 20:18:02 +00001184 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001185 """Write a file into the archive. The contents is the string
Just van Rossumb083cb32002-12-12 12:23:32 +00001186 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1187 the name of the file in the archive."""
1188 if not isinstance(zinfo_or_arcname, ZipInfo):
1189 zinfo = ZipInfo(filename=zinfo_or_arcname,
Raymond Hettinger351e1a32008-01-14 22:58:05 +00001190 date_time=time.localtime(time.time())[:6])
Ronald Oussorendd25e862010-02-07 20:18:02 +00001191
Just van Rossumb083cb32002-12-12 12:23:32 +00001192 zinfo.compress_type = self.compression
Antoine Pitrou5fdfa3e2008-07-25 19:42:26 +00001193 zinfo.external_attr = 0600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001194 else:
1195 zinfo = zinfo_or_arcname
Georg Brandl4b3ab6f2007-07-12 09:59:22 +00001196
1197 if not self.fp:
1198 raise RuntimeError(
1199 "Attempt to write to ZIP archive that was already closed")
1200
Ronald Oussorendd25e862010-02-07 20:18:02 +00001201 if compress_type is not None:
1202 zinfo.compress_type = compress_type
1203
Tim Peterse1190062001-01-15 03:34:38 +00001204 zinfo.file_size = len(bytes) # Uncompressed size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001205 zinfo.header_offset = self.fp.tell() # Start of header bytes
1206 self._writecheck(zinfo)
1207 self._didModify = True
Gregory P. Smithbf02e3b2008-03-19 03:14:41 +00001208 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001209 if zinfo.compress_type == ZIP_DEFLATED:
1210 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1211 zlib.DEFLATED, -15)
1212 bytes = co.compress(bytes) + co.flush()
Tim Peterse1190062001-01-15 03:34:38 +00001213 zinfo.compress_size = len(bytes) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001214 else:
1215 zinfo.compress_size = zinfo.file_size
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001216 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1217 zinfo.compress_size > ZIP64_LIMIT
1218 if zip64 and not self._allowZip64:
1219 raise LargeZipFile("Filesize would require ZIP64 extensions")
1220 self.fp.write(zinfo.FileHeader(zip64))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001221 self.fp.write(bytes)
1222 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001223 # Write CRC and file sizes after the file data
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001224 fmt = '<LQQ' if zip64 else '<LLL'
1225 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001226 zinfo.file_size))
Serhiy Storchaka80a9fd72013-01-15 00:29:51 +02001227 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001228 self.filelist.append(zinfo)
1229 self.NameToInfo[zinfo.filename] = zinfo
1230
1231 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001232 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001233 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001234
1235 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001236 """Close the file, and for mode "w" and "a" write the ending
1237 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001238 if self.fp is None:
1239 return
Tim Petersa608bb22006-06-15 18:06:29 +00001240
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001241 try:
1242 if self.mode in ("w", "a") and self._didModify: # write ending records
1243 count = 0
1244 pos1 = self.fp.tell()
1245 for zinfo in self.filelist: # write central directory
1246 count = count + 1
1247 dt = zinfo.date_time
1248 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1249 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1250 extra = []
1251 if zinfo.file_size > ZIP64_LIMIT \
1252 or zinfo.compress_size > ZIP64_LIMIT:
1253 extra.append(zinfo.file_size)
1254 extra.append(zinfo.compress_size)
1255 file_size = 0xffffffff
1256 compress_size = 0xffffffff
1257 else:
1258 file_size = zinfo.file_size
1259 compress_size = zinfo.compress_size
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001260
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001261 if zinfo.header_offset > ZIP64_LIMIT:
1262 extra.append(zinfo.header_offset)
1263 header_offset = 0xffffffffL
1264 else:
1265 header_offset = zinfo.header_offset
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001266
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001267 extra_data = zinfo.extra
1268 if extra:
1269 # Append a ZIP64 field to the extra's
1270 extra_data = struct.pack(
1271 '<HH' + 'Q'*len(extra),
1272 1, 8*len(extra), *extra) + extra_data
Tim Petersa608bb22006-06-15 18:06:29 +00001273
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001274 extract_version = max(45, zinfo.extract_version)
1275 create_version = max(45, zinfo.create_version)
1276 else:
1277 extract_version = zinfo.extract_version
1278 create_version = zinfo.create_version
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001279
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001280 try:
1281 filename, flag_bits = zinfo._encodeFilenameFlags()
1282 centdir = struct.pack(structCentralDir,
1283 stringCentralDir, create_version,
1284 zinfo.create_system, extract_version, zinfo.reserved,
1285 flag_bits, zinfo.compress_type, dostime, dosdate,
1286 zinfo.CRC, compress_size, file_size,
1287 len(filename), len(extra_data), len(zinfo.comment),
1288 0, zinfo.internal_attr, zinfo.external_attr,
1289 header_offset)
1290 except DeprecationWarning:
1291 print >>sys.stderr, (structCentralDir,
1292 stringCentralDir, create_version,
1293 zinfo.create_system, extract_version, zinfo.reserved,
1294 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1295 zinfo.CRC, compress_size, file_size,
1296 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1297 0, zinfo.internal_attr, zinfo.external_attr,
1298 header_offset)
1299 raise
1300 self.fp.write(centdir)
1301 self.fp.write(filename)
1302 self.fp.write(extra_data)
1303 self.fp.write(zinfo.comment)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001304
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001305 pos2 = self.fp.tell()
1306 # Write end-of-zip-archive record
1307 centDirCount = count
1308 centDirSize = pos2 - pos1
1309 centDirOffset = pos1
1310 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1311 centDirOffset > ZIP64_LIMIT or
1312 centDirSize > ZIP64_LIMIT):
1313 # Need to write the ZIP64 end-of-archive records
1314 zip64endrec = struct.pack(
1315 structEndArchive64, stringEndArchive64,
1316 44, 45, 45, 0, 0, centDirCount, centDirCount,
1317 centDirSize, centDirOffset)
1318 self.fp.write(zip64endrec)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001319
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001320 zip64locrec = struct.pack(
1321 structEndArchive64Locator,
1322 stringEndArchive64Locator, 0, pos2, 1)
1323 self.fp.write(zip64locrec)
1324 centDirCount = min(centDirCount, 0xFFFF)
1325 centDirSize = min(centDirSize, 0xFFFFFFFF)
1326 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001327
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001328 endrec = struct.pack(structEndArchive, stringEndArchive,
1329 0, 0, centDirCount, centDirCount,
1330 centDirSize, centDirOffset, len(self._comment))
1331 self.fp.write(endrec)
1332 self.fp.write(self._comment)
1333 self.fp.flush()
1334 finally:
1335 fp = self.fp
1336 self.fp = None
1337 if not self._filePassed:
1338 fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339
1340
1341class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001342 """Class to create ZIP archives with Python library files and packages."""
1343
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001344 def writepy(self, pathname, basename = ""):
1345 """Add all files from "pathname" to the ZIP archive.
1346
Fred Drake484d7352000-10-02 21:14:52 +00001347 If pathname is a package directory, search the directory and
1348 all package subdirectories recursively for all *.py and enter
1349 the modules into the archive. If pathname is a plain
1350 directory, listdir *.py and enter all modules. Else, pathname
1351 must be a Python *.py file and the module will be put into the
1352 archive. Added modules are always module.pyo or module.pyc.
1353 This method will compile the module.py into module.pyc if
1354 necessary.
1355 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001356 dir, name = os.path.split(pathname)
1357 if os.path.isdir(pathname):
1358 initname = os.path.join(pathname, "__init__.py")
1359 if os.path.isfile(initname):
1360 # This is a package directory, add it
1361 if basename:
1362 basename = "%s/%s" % (basename, name)
1363 else:
1364 basename = name
1365 if self.debug:
1366 print "Adding package in", pathname, "as", basename
1367 fname, arcname = self._get_codename(initname[0:-3], basename)
1368 if self.debug:
1369 print "Adding", arcname
1370 self.write(fname, arcname)
1371 dirlist = os.listdir(pathname)
1372 dirlist.remove("__init__.py")
1373 # Add all *.py files and package subdirectories
1374 for filename in dirlist:
1375 path = os.path.join(pathname, filename)
1376 root, ext = os.path.splitext(filename)
1377 if os.path.isdir(path):
1378 if os.path.isfile(os.path.join(path, "__init__.py")):
1379 # This is a package directory, add it
1380 self.writepy(path, basename) # Recursive call
1381 elif ext == ".py":
1382 fname, arcname = self._get_codename(path[0:-3],
1383 basename)
1384 if self.debug:
1385 print "Adding", arcname
1386 self.write(fname, arcname)
1387 else:
1388 # This is NOT a package directory, add its files at top level
1389 if self.debug:
1390 print "Adding files from directory", pathname
1391 for filename in os.listdir(pathname):
1392 path = os.path.join(pathname, filename)
1393 root, ext = os.path.splitext(filename)
1394 if ext == ".py":
1395 fname, arcname = self._get_codename(path[0:-3],
1396 basename)
1397 if self.debug:
1398 print "Adding", arcname
1399 self.write(fname, arcname)
1400 else:
1401 if pathname[-3:] != ".py":
1402 raise RuntimeError, \
Fred Drake5db246d2000-09-29 20:44:48 +00001403 'Files added with writepy() must end with ".py"'
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001404 fname, arcname = self._get_codename(pathname[0:-3], basename)
1405 if self.debug:
1406 print "Adding file", arcname
1407 self.write(fname, arcname)
1408
1409 def _get_codename(self, pathname, basename):
1410 """Return (filename, archivename) for the path.
1411
Fred Drake484d7352000-10-02 21:14:52 +00001412 Given a module name path, return the correct file path and
1413 archive name, compiling if necessary. For example, given
1414 /python/lib/string, return (/python/lib/string.pyc, string).
1415 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001416 file_py = pathname + ".py"
1417 file_pyc = pathname + ".pyc"
1418 file_pyo = pathname + ".pyo"
1419 if os.path.isfile(file_pyo) and \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001420 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
Tim Peterse1190062001-01-15 03:34:38 +00001421 fname = file_pyo # Use .pyo file
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001422 elif not os.path.isfile(file_pyc) or \
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001423 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
Fred Drake484d7352000-10-02 21:14:52 +00001424 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001425 if self.debug:
1426 print "Compiling", file_py
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001427 try:
1428 py_compile.compile(file_py, file_pyc, None, True)
1429 except py_compile.PyCompileError,err:
1430 print err.msg
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001431 fname = file_pyc
1432 else:
1433 fname = file_pyc
1434 archivename = os.path.split(fname)[1]
1435 if basename:
1436 archivename = "%s/%s" % (basename, archivename)
1437 return (fname, archivename)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001438
1439
1440def main(args = None):
1441 import textwrap
1442 USAGE=textwrap.dedent("""\
1443 Usage:
1444 zipfile.py -l zipfile.zip # Show listing of a zipfile
1445 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1446 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1447 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1448 """)
1449 if args is None:
1450 args = sys.argv[1:]
1451
1452 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1453 print USAGE
1454 sys.exit(1)
1455
1456 if args[0] == '-l':
1457 if len(args) != 2:
1458 print USAGE
1459 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001460 with ZipFile(args[1], 'r') as zf:
1461 zf.printdir()
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001462
1463 elif args[0] == '-t':
1464 if len(args) != 2:
1465 print USAGE
1466 sys.exit(1)
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001467 with ZipFile(args[1], 'r') as zf:
1468 badfile = zf.testzip()
Antoine Pitroue1436d12010-08-12 15:25:51 +00001469 if badfile:
1470 print("The following enclosed file is corrupted: {!r}".format(badfile))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001471 print "Done testing"
1472
1473 elif args[0] == '-e':
1474 if len(args) != 3:
1475 print USAGE
1476 sys.exit(1)
1477
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001478 with ZipFile(args[1], 'r') as zf:
1479 out = args[2]
1480 for path in zf.namelist():
1481 if path.startswith('./'):
1482 tgt = os.path.join(out, path[2:])
1483 else:
1484 tgt = os.path.join(out, path)
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001485
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001486 tgtdir = os.path.dirname(tgt)
1487 if not os.path.exists(tgtdir):
1488 os.makedirs(tgtdir)
1489 with open(tgt, 'wb') as fp:
1490 fp.write(zf.read(path))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001491
1492 elif args[0] == '-c':
1493 if len(args) < 3:
1494 print USAGE
1495 sys.exit(1)
1496
1497 def addToZip(zf, path, zippath):
1498 if os.path.isfile(path):
1499 zf.write(path, zippath, ZIP_DEFLATED)
1500 elif os.path.isdir(path):
1501 for nm in os.listdir(path):
Tim Petersa608bb22006-06-15 18:06:29 +00001502 addToZip(zf,
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001503 os.path.join(path, nm), os.path.join(zippath, nm))
Tim Petersa608bb22006-06-15 18:06:29 +00001504 # else: ignore
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001505
Antoine Pitrou02512fb2012-11-17 23:56:53 +01001506 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1507 for src in args[2:]:
1508 addToZip(zf, src, os.path.basename(src))
Ronald Oussoren143cefb2006-06-15 08:14:18 +00001509
1510if __name__ == "__main__":
1511 main()