blob: 50f484873ad85990fe76a624fded11a269e99392 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Georg Brandl4d540882010-10-28 06:42:33 +000025__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
26 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Georg Brandl4d540882010-10-28 06:42:33 +000028class BadZipFile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Georg Brandl4d540882010-10-28 06:42:33 +000038error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
39
Guido van Rossum32abe6f2000-03-31 17:30:02 +000040
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000041ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000042ZIP_FILECOUNT_LIMIT = 1 << 16
43ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000044
Guido van Rossum32abe6f2000-03-31 17:30:02 +000045# constants for Zip file compression methods
46ZIP_STORED = 0
47ZIP_DEFLATED = 8
48# Other ZIP compression methods not supported
49
Martin v. Löwisb09b8442008-07-03 14:13:42 +000050# Below are some formats and associated data for reading/writing headers using
51# the struct module. The names and structures of headers/records are those used
52# in the PKWARE description of the ZIP file format:
53# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
54# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000055
Martin v. Löwisb09b8442008-07-03 14:13:42 +000056# The "end of central directory" structure, magic number, size, and indices
57# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000058structEndArchive = b"<4s4H2LH"
59stringEndArchive = b"PK\005\006"
60sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000061
62_ECD_SIGNATURE = 0
63_ECD_DISK_NUMBER = 1
64_ECD_DISK_START = 2
65_ECD_ENTRIES_THIS_DISK = 3
66_ECD_ENTRIES_TOTAL = 4
67_ECD_SIZE = 5
68_ECD_OFFSET = 6
69_ECD_COMMENT_SIZE = 7
70# These last two indices are not part of the structure as defined in the
71# spec, but they are used internally by this module as a convenience
72_ECD_COMMENT = 8
73_ECD_LOCATION = 9
74
75# The "central directory" structure, magic number, size, and indices
76# of entries in the structure (section V.F in the format document)
77structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000078stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000079sizeCentralDir = struct.calcsize(structCentralDir)
80
Fred Drake3e038e52001-02-28 17:56:26 +000081# indexes of entries in the central directory structure
82_CD_SIGNATURE = 0
83_CD_CREATE_VERSION = 1
84_CD_CREATE_SYSTEM = 2
85_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000086_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000087_CD_FLAG_BITS = 5
88_CD_COMPRESS_TYPE = 6
89_CD_TIME = 7
90_CD_DATE = 8
91_CD_CRC = 9
92_CD_COMPRESSED_SIZE = 10
93_CD_UNCOMPRESSED_SIZE = 11
94_CD_FILENAME_LENGTH = 12
95_CD_EXTRA_FIELD_LENGTH = 13
96_CD_COMMENT_LENGTH = 14
97_CD_DISK_NUMBER_START = 15
98_CD_INTERNAL_FILE_ATTRIBUTES = 16
99_CD_EXTERNAL_FILE_ATTRIBUTES = 17
100_CD_LOCAL_HEADER_OFFSET = 18
101
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000102# The "local file header" structure, magic number, size, and indices
103# (section V.A in the format document)
104structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000105stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000106sizeFileHeader = struct.calcsize(structFileHeader)
107
Fred Drake3e038e52001-02-28 17:56:26 +0000108_FH_SIGNATURE = 0
109_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000110_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000111_FH_GENERAL_PURPOSE_FLAG_BITS = 3
112_FH_COMPRESSION_METHOD = 4
113_FH_LAST_MOD_TIME = 5
114_FH_LAST_MOD_DATE = 6
115_FH_CRC = 7
116_FH_COMPRESSED_SIZE = 8
117_FH_UNCOMPRESSED_SIZE = 9
118_FH_FILENAME_LENGTH = 10
119_FH_EXTRA_FIELD_LENGTH = 11
120
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000121# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000122structEndArchive64Locator = "<4sLQL"
123stringEndArchive64Locator = b"PK\x06\x07"
124sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000125
126# The "Zip64 end of central directory" record, magic number, size, and indices
127# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000128structEndArchive64 = "<4sQ2H2L4Q"
129stringEndArchive64 = b"PK\x06\x06"
130sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000131
132_CD64_SIGNATURE = 0
133_CD64_DIRECTORY_RECSIZE = 1
134_CD64_CREATE_VERSION = 2
135_CD64_EXTRACT_VERSION = 3
136_CD64_DISK_NUMBER = 4
137_CD64_DISK_NUMBER_START = 5
138_CD64_NUMBER_ENTRIES_THIS_DISK = 6
139_CD64_NUMBER_ENTRIES_TOTAL = 7
140_CD64_DIRECTORY_SIZE = 8
141_CD64_OFFSET_START_CENTDIR = 9
142
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000143def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000144 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000145 if _EndRecData(fp):
146 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000147 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000148 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000149 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000150
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000151def is_zipfile(filename):
152 """Quickly see if a file is a ZIP file by checking the magic number.
153
154 The filename argument may be a file or file-like object too.
155 """
156 result = False
157 try:
158 if hasattr(filename, "read"):
159 result = _check_zipfile(fp=filename)
160 else:
161 with open(filename, "rb") as fp:
162 result = _check_zipfile(fp)
163 except IOError:
164 pass
165 return result
166
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000167def _EndRecData64(fpin, offset, endrec):
168 """
169 Read the ZIP64 end-of-archive records and use that to update endrec
170 """
Georg Brandl268e4d42010-10-14 06:59:45 +0000171 try:
172 fpin.seek(offset - sizeEndCentDir64Locator, 2)
173 except IOError:
174 # If the seek fails, the file is not large enough to contain a ZIP64
175 # end-of-archive record, so just return the end record we were given.
176 return endrec
177
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000178 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000179 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
180 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000181 return endrec
182
183 if diskno != 0 or disks != 1:
Éric Araujoae2d8322010-10-28 13:49:17 +0000184 raise BadZipFile("zipfiles that span multiple disks are not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000185
186 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000187 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
188 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189 sig, sz, create_version, read_version, disk_num, disk_dir, \
190 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000191 struct.unpack(structEndArchive64, data)
192 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193 return endrec
194
195 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000196 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000197 endrec[_ECD_DISK_NUMBER] = disk_num
198 endrec[_ECD_DISK_START] = disk_dir
199 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
200 endrec[_ECD_ENTRIES_TOTAL] = dircount2
201 endrec[_ECD_SIZE] = dirsize
202 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000203 return endrec
204
205
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000206def _EndRecData(fpin):
207 """Return data from the "End of Central Directory" record, or None.
208
209 The data is a list of the nine items in the ZIP "End of central dir"
210 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000211
212 # Determine file size
213 fpin.seek(0, 2)
214 filesize = fpin.tell()
215
216 # Check to see if this is ZIP file with no archive comment (the
217 # "end of central directory" structure should be the last item in the
218 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000219 try:
220 fpin.seek(-sizeEndCentDir, 2)
221 except IOError:
222 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000223 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000224 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000226 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000227 endrec=list(endrec)
228
229 # Append a blank comment and record start offset
230 endrec.append(b"")
231 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000232
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000235
236 # Either this is not a ZIP file, or it is a ZIP file with an archive
237 # comment. Search the end of the file for the "end of central directory"
238 # record signature. The comment is the last item in the ZIP file and may be
239 # up to 64K long. It is assumed that the "end of central directory" magic
240 # number does not appear in the comment.
241 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
242 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000243 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000244 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000245 if start >= 0:
246 # found the magic number; attempt to unpack and interpret
247 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000248 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000249 comment = data[start+sizeEndCentDir:]
250 # check that comment length is correct
251 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000252 # Append the archive comment and start offset
253 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000254 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000255
256 # Try to read the "Zip64 end of central directory" structure
257 return _EndRecData64(fpin, maxCommentStart + start - filesize,
258 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000259
260 # Unable to find a valid end of central directory structure
261 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000262
Fred Drake484d7352000-10-02 21:14:52 +0000263
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000264class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000265 """Class with attributes describing each file in the ZIP archive."""
266
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000267 __slots__ = (
268 'orig_filename',
269 'filename',
270 'date_time',
271 'compress_type',
272 'comment',
273 'extra',
274 'create_system',
275 'create_version',
276 'extract_version',
277 'reserved',
278 'flag_bits',
279 'volume',
280 'internal_attr',
281 'external_attr',
282 'header_offset',
283 'CRC',
284 'compress_size',
285 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000286 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000287 )
288
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000289 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000290 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000291
292 # Terminate the file name at the first null byte. Null bytes in file
293 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000294 null_byte = filename.find(chr(0))
295 if null_byte >= 0:
296 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000297 # This is used to ensure paths in generated ZIP files always use
298 # forward slashes as the directory separator, as required by the
299 # ZIP format specification.
300 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000301 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000302
Greg Ward8e36d282003-06-18 00:53:06 +0000303 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000304 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000305 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000306 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000307 self.comment = b"" # Comment for each file
308 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000309 if sys.platform == 'win32':
310 self.create_system = 0 # System which created ZIP archive
311 else:
312 # Assume everything else is unix-y
313 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000314 self.create_version = 20 # Version which created ZIP archive
315 self.extract_version = 20 # Version needed to extract archive
316 self.reserved = 0 # Must be zero
317 self.flag_bits = 0 # ZIP flag bits
318 self.volume = 0 # Volume number of file header
319 self.internal_attr = 0 # Internal attributes
320 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000321 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000322 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000323 # CRC CRC-32 of the uncompressed file
324 # compress_size Size of the compressed file
325 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000326
327 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000328 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000329 dt = self.date_time
330 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000331 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000332 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000333 # Set these to zero because we write them after the file data
334 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000335 else:
Tim Peterse1190062001-01-15 03:34:38 +0000336 CRC = self.CRC
337 compress_size = self.compress_size
338 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339
340 extra = self.extra
341
342 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
343 # File is larger than what fits into a 4 byte integer,
344 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000345 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000346 extra = extra + struct.pack(fmt,
347 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000348 file_size = 0xffffffff
349 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000350 self.extract_version = max(45, self.extract_version)
351 self.create_version = max(45, self.extract_version)
352
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000353 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000354 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000355 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000356 self.compress_type, dostime, dosdate, CRC,
357 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000358 len(filename), len(extra))
359 return header + filename + extra
360
361 def _encodeFilenameFlags(self):
362 try:
363 return self.filename.encode('ascii'), self.flag_bits
364 except UnicodeEncodeError:
365 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366
367 def _decodeExtra(self):
368 # Try to decode the extra field.
369 extra = self.extra
370 unpack = struct.unpack
371 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 if tp == 1:
374 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000375 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000376 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000377 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000379 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000380 elif ln == 0:
381 counts = ()
382 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000383 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000384
385 idx = 0
386
387 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000388 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000389 self.file_size = counts[idx]
390 idx += 1
391
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000392 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000393 self.compress_size = counts[idx]
394 idx += 1
395
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000396 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000397 old = self.header_offset
398 self.header_offset = counts[idx]
399 idx+=1
400
401 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000402
403
Thomas Wouterscf297e42007-02-23 15:07:44 +0000404class _ZipDecrypter:
405 """Class to handle decryption of files stored within a ZIP archive.
406
407 ZIP supports a password-based form of encryption. Even though known
408 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000409 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000410
411 Usage:
412 zd = _ZipDecrypter(mypwd)
413 plain_char = zd(cypher_char)
414 plain_text = map(zd, cypher_text)
415 """
416
417 def _GenerateCRCTable():
418 """Generate a CRC-32 table.
419
420 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
421 internal keys. We noticed that a direct implementation is faster than
422 relying on binascii.crc32().
423 """
424 poly = 0xedb88320
425 table = [0] * 256
426 for i in range(256):
427 crc = i
428 for j in range(8):
429 if crc & 1:
430 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
431 else:
432 crc = ((crc >> 1) & 0x7FFFFFFF)
433 table[i] = crc
434 return table
435 crctable = _GenerateCRCTable()
436
437 def _crc32(self, ch, crc):
438 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000439 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000440
441 def __init__(self, pwd):
442 self.key0 = 305419896
443 self.key1 = 591751049
444 self.key2 = 878082192
445 for p in pwd:
446 self._UpdateKeys(p)
447
448 def _UpdateKeys(self, c):
449 self.key0 = self._crc32(c, self.key0)
450 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
451 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000452 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000453
454 def __call__(self, c):
455 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000456 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000457 k = self.key2 | 2
458 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000459 self._UpdateKeys(c)
460 return c
461
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000462class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000463 """File-like object for reading an archive member.
464 Is returned by ZipFile.open().
465 """
466
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000467 # Max size supported by decompressor.
468 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000469
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000470 # Read from compressed files in 4k blocks.
471 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000472
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000473 # Search for universal newlines or line chunks.
474 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
475
Łukasz Langae94980a2010-11-22 23:31:26 +0000476 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
477 close_fileobj=False):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000478 self._fileobj = fileobj
479 self._decrypter = decrypter
Łukasz Langae94980a2010-11-22 23:31:26 +0000480 self._close_fileobj = close_fileobj
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000481
Ezio Melotti92b47432010-01-28 01:44:41 +0000482 self._compress_type = zipinfo.compress_type
483 self._compress_size = zipinfo.compress_size
484 self._compress_left = zipinfo.compress_size
485
486 if self._compress_type == ZIP_DEFLATED:
487 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000488 self._unconsumed = b''
489
490 self._readbuffer = b''
491 self._offset = 0
492
493 self._universal = 'U' in mode
494 self.newlines = None
495
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000496 # Adjust read size for encrypted files since the first 12 bytes
497 # are for the encryption/password information.
498 if self._decrypter is not None:
499 self._compress_left -= 12
500
501 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000502 self.name = zipinfo.filename
503
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000504 if hasattr(zipinfo, 'CRC'):
505 self._expected_crc = zipinfo.CRC
506 self._running_crc = crc32(b'') & 0xffffffff
507 else:
508 self._expected_crc = None
509
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000510 def readline(self, limit=-1):
511 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000512
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000513 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000514 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000515
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000516 if not self._universal and limit < 0:
517 # Shortcut common case - newline found in buffer.
518 i = self._readbuffer.find(b'\n', self._offset) + 1
519 if i > 0:
520 line = self._readbuffer[self._offset: i]
521 self._offset = i
522 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000523
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000524 if not self._universal:
525 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000526
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000527 line = b''
528 while limit < 0 or len(line) < limit:
529 readahead = self.peek(2)
530 if readahead == b'':
531 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000532
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000533 #
534 # Search for universal newlines or line chunks.
535 #
536 # The pattern returns either a line chunk or a newline, but not
537 # both. Combined with peek(2), we are assured that the sequence
538 # '\r\n' is always retrieved completely and never split into
539 # separate newlines - '\r', '\n' due to coincidental readaheads.
540 #
541 match = self.PATTERN.search(readahead)
542 newline = match.group('newline')
543 if newline is not None:
544 if self.newlines is None:
545 self.newlines = []
546 if newline not in self.newlines:
547 self.newlines.append(newline)
548 self._offset += len(newline)
549 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000550
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000551 chunk = match.group('chunk')
552 if limit >= 0:
553 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000554
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000555 self._offset += len(chunk)
556 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000557
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000558 return line
559
560 def peek(self, n=1):
561 """Returns buffered bytes without advancing the position."""
562 if n > len(self._readbuffer) - self._offset:
563 chunk = self.read(n)
564 self._offset -= len(chunk)
565
566 # Return up to 512 bytes to reduce allocation overhead for tight loops.
567 return self._readbuffer[self._offset: self._offset + 512]
568
569 def readable(self):
570 return True
571
572 def read(self, n=-1):
573 """Read and return up to n bytes.
574 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000575 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000576 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000577 if n is None:
578 n = -1
579 while True:
580 if n < 0:
581 data = self.read1(n)
582 elif n > len(buf):
583 data = self.read1(n - len(buf))
584 else:
585 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000586 if len(data) == 0:
587 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000588 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000589
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000590 def _update_crc(self, newdata, eof):
591 # Update the CRC using the given data.
592 if self._expected_crc is None:
593 # No need to compute the CRC if we don't have a reference value
594 return
595 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
596 # Check the CRC if we're at the end of the file
597 if eof and self._running_crc != self._expected_crc:
Georg Brandl4d540882010-10-28 06:42:33 +0000598 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000599
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000600 def read1(self, n):
601 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000602
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000603 # Simplify algorithm (branching) by transforming negative n to large n.
604 if n < 0 or n is None:
605 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000606
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000607 # Bytes available in read buffer.
608 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000609
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000610 # Read from file.
611 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
612 nbytes = n - len_readbuffer - len(self._unconsumed)
613 nbytes = max(nbytes, self.MIN_READ_SIZE)
614 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000615
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000616 data = self._fileobj.read(nbytes)
617 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000618
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000619 if data and self._decrypter is not None:
620 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000621
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000623 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000624 self._readbuffer = self._readbuffer[self._offset:] + data
625 self._offset = 0
626 else:
627 # Prepare deflated bytes for decompression.
628 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000629
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000630 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000631 if (len(self._unconsumed) > 0 and n > len_readbuffer and
632 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000633 data = self._decompressor.decompress(
634 self._unconsumed,
635 max(n - len_readbuffer, self.MIN_READ_SIZE)
636 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000637
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000638 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000639 eof = len(self._unconsumed) == 0 and self._compress_left == 0
640 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000641 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000642
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000643 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000644 self._readbuffer = self._readbuffer[self._offset:] + data
645 self._offset = 0
646
647 # Read from buffer.
648 data = self._readbuffer[self._offset: self._offset + n]
649 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000650 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000651
Łukasz Langae94980a2010-11-22 23:31:26 +0000652 def close(self):
653 try:
654 if self._close_fileobj:
655 self._fileobj.close()
656 finally:
657 super().close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000658
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000659
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000660class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000661 """ Class with methods to open, read, write, close, list zip files.
662
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000663 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000664
Fred Drake3d9091e2001-03-26 15:49:24 +0000665 file: Either the path to the file, or a file-like object.
666 If it is a path, the file will be opened and closed by ZipFile.
667 mode: The mode can be either read "r", write "w" or append "a".
668 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000669 allowZip64: if True ZipFile will create files with ZIP64 extensions when
670 needed, otherwise it will raise an exception when this would
671 be necessary.
672
Fred Drake3d9091e2001-03-26 15:49:24 +0000673 """
Fred Drake484d7352000-10-02 21:14:52 +0000674
Fred Drake90eac282001-02-28 05:29:34 +0000675 fp = None # Set here since __del__ checks it
676
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000677 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000678 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000679 if mode not in ("r", "w", "a"):
680 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
681
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000682 if compression == ZIP_STORED:
683 pass
684 elif compression == ZIP_DEFLATED:
685 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000686 raise RuntimeError(
687 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000688 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000689 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000690
691 self._allowZip64 = allowZip64
692 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000693 self.debug = 0 # Level of printing: 0 through 3
694 self.NameToInfo = {} # Find file info given name
695 self.filelist = [] # List of ZipInfo instances for archive
696 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000697 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000698 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000699 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000700
Fred Drake3d9091e2001-03-26 15:49:24 +0000701 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000702 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000703 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000704 self._filePassed = 0
705 self.filename = file
706 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000707 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000708 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000709 except IOError:
710 if mode == 'a':
711 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000712 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000713 else:
714 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000715 else:
716 self._filePassed = 1
717 self.fp = file
718 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000719
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000720 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721 self._GetContents()
722 elif key == 'w':
Georg Brandl268e4d42010-10-14 06:59:45 +0000723 # set the modified flag so central directory gets written
724 # even if no files are added to the archive
725 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000726 elif key == 'a':
Georg Brandl268e4d42010-10-14 06:59:45 +0000727 try:
728 # See if file is a zip file
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000729 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000730 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000731 self.fp.seek(self.start_dir, 0)
Georg Brandl4d540882010-10-28 06:42:33 +0000732 except BadZipFile:
Georg Brandl268e4d42010-10-14 06:59:45 +0000733 # file is not a zip file, just append
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000734 self.fp.seek(0, 2)
Georg Brandl268e4d42010-10-14 06:59:45 +0000735
736 # set the modified flag so central directory gets written
737 # even if no files are added to the archive
738 self._didModify = True
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000739 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000740 if not self._filePassed:
741 self.fp.close()
742 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000743 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000744
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000745 def __enter__(self):
746 return self
747
748 def __exit__(self, type, value, traceback):
749 self.close()
750
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000752 """Read the directory, making sure we close the file if the format
753 is bad."""
754 try:
755 self._RealGetContents()
Georg Brandl4d540882010-10-28 06:42:33 +0000756 except BadZipFile:
Tim Peters7d3bad62001-04-04 18:56:49 +0000757 if not self._filePassed:
758 self.fp.close()
759 self.fp = None
760 raise
761
762 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000763 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000764 fp = self.fp
Georg Brandl268e4d42010-10-14 06:59:45 +0000765 try:
766 endrec = _EndRecData(fp)
767 except IOError:
Georg Brandl4d540882010-10-28 06:42:33 +0000768 raise BadZipFile("File is not a zip file")
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000769 if not endrec:
Georg Brandl4d540882010-10-28 06:42:33 +0000770 raise BadZipFile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000771 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000772 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000773 size_cd = endrec[_ECD_SIZE] # bytes in central directory
774 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
775 self.comment = endrec[_ECD_COMMENT] # archive comment
776
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000777 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000778 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000779 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
780 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000781 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
782
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000784 inferred = concat + offset_cd
785 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 # self.start_dir: Position of start of central directory
787 self.start_dir = offset_cd + concat
788 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000789 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000790 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000791 total = 0
792 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000793 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000794 if centdir[0:4] != stringCentralDir:
Georg Brandl4d540882010-10-28 06:42:33 +0000795 raise BadZipFile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000796 centdir = struct.unpack(structCentralDir, centdir)
797 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000798 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000799 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000800 flags = centdir[5]
801 if flags & 0x800:
802 # UTF-8 file names extension
803 filename = filename.decode('utf-8')
804 else:
805 # Historical ZIP filename encoding
806 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000808 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000809 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
810 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000811 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000812 (x.create_version, x.create_system, x.extract_version, x.reserved,
813 x.flag_bits, x.compress_type, t, d,
814 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
815 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
816 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000817 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000818 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000819 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000820
821 x._decodeExtra()
822 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000823 self.filelist.append(x)
824 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000825
826 # update total bytes read from central directory
827 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
828 + centdir[_CD_EXTRA_FIELD_LENGTH]
829 + centdir[_CD_COMMENT_LENGTH])
830
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000831 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000832 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000833
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000834
835 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000836 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000837 l = []
838 for data in self.filelist:
839 l.append(data.filename)
840 return l
841
842 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000843 """Return a list of class ZipInfo instances for files in the
844 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000845 return self.filelist
846
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000847 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000848 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000849 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
850 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000851 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000852 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000853 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
854 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000855
856 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000857 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000858 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000859 for zinfo in self.filelist:
860 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000861 # Read by chunks, to avoid an OverflowError or a
862 # MemoryError with very large embedded files.
863 f = self.open(zinfo.filename, "r")
864 while f.read(chunk_size): # Check CRC-32
865 pass
Georg Brandl4d540882010-10-28 06:42:33 +0000866 except BadZipFile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000867 return zinfo.filename
868
869 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000870 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000871 info = self.NameToInfo.get(name)
872 if info is None:
873 raise KeyError(
874 'There is no item named %r in the archive' % name)
875
876 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000877
Thomas Wouterscf297e42007-02-23 15:07:44 +0000878 def setpassword(self, pwd):
879 """Set default password for encrypted files."""
R. David Murray8d855d82010-12-21 21:53:37 +0000880 if pwd and not isinstance(pwd, bytes):
881 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
882 if pwd:
883 self.pwd = pwd
884 else:
885 self.pwd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000886
887 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000888 """Return file bytes (as a string) for name."""
Benjamin Petersond285bdb2010-10-31 17:57:22 +0000889 with self.open(name, "r", pwd) as fp:
890 return fp.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891
892 def open(self, name, mode="r", pwd=None):
893 """Return file-like object for 'name'."""
894 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000895 raise RuntimeError('open() requires mode "r", "U", or "rU"')
R. David Murray8d855d82010-12-21 21:53:37 +0000896 if pwd and not isinstance(pwd, bytes):
897 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000898 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000899 raise RuntimeError(
900 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000901
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902 # Only open a new file for instances where we were not
903 # given a file object in the constructor
904 if self._filePassed:
905 zef_file = self.fp
906 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000907 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908
Georg Brandlb533e262008-05-25 18:19:30 +0000909 # Make sure we have an info object
910 if isinstance(name, ZipInfo):
911 # 'name' is already an info object
912 zinfo = name
913 else:
914 # Get info object for name
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000915 try:
916 zinfo = self.getinfo(name)
917 except KeyError:
918 if not self._filePassed:
919 zef_file.close()
920 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000922
923 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000924 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000925 if fheader[0:4] != stringFileHeader:
Georg Brandl4d540882010-10-28 06:42:33 +0000926 raise BadZipFile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000927
928 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000930 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000932
Georg Brandl5ba11de2011-01-01 10:09:32 +0000933 if zinfo.flag_bits & 0x800:
934 # UTF-8 filename
935 fname_str = fname.decode("utf-8")
936 else:
937 fname_str = fname.decode("cp437")
938
939 if fname_str != zinfo.orig_filename:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000940 if not self._filePassed:
941 zef_file.close()
Georg Brandl4d540882010-10-28 06:42:33 +0000942 raise BadZipFile(
Collin Winterce36ad82007-08-30 01:19:48 +0000943 'File name in directory %r and header %r differ.'
944 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000945
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 # check for encrypted flag & handle password
947 is_encrypted = zinfo.flag_bits & 0x1
948 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000949 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000950 if not pwd:
951 pwd = self.pwd
952 if not pwd:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000953 if not self._filePassed:
954 zef_file.close()
Collin Winterce36ad82007-08-30 01:19:48 +0000955 raise RuntimeError("File %s is encrypted, "
956 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957
Thomas Wouterscf297e42007-02-23 15:07:44 +0000958 zd = _ZipDecrypter(pwd)
959 # The first 12 bytes in the cypher stream is an encryption header
960 # used to strengthen the algorithm. The first 11 bytes are
961 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000962 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000963 # and is used to check the correctness of the password.
R. David Murray8d855d82010-12-21 21:53:37 +0000964 header = zef_file.read(12)
965 h = list(map(zd, header[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000966 if zinfo.flag_bits & 0x8:
967 # compare against the file type from extended local headers
968 check_byte = (zinfo._raw_time >> 8) & 0xff
969 else:
970 # compare against the CRC otherwise
971 check_byte = (zinfo.CRC >> 24) & 0xff
972 if h[11] != check_byte:
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000973 if not self._filePassed:
974 zef_file.close()
Christian Heimesfdab48e2008-01-20 09:06:41 +0000975 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976
Łukasz Langaa9f054b2010-11-23 00:15:02 +0000977 return ZipExtFile(zef_file, mode, zinfo, zd,
978 close_fileobj=not self._filePassed)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000979
Christian Heimes790c8232008-01-07 21:14:23 +0000980 def extract(self, member, path=None, pwd=None):
981 """Extract a member from the archive to the current working directory,
982 using its full name. Its file information is extracted as accurately
983 as possible. `member' may be a filename or a ZipInfo object. You can
984 specify a different directory using `path'.
985 """
986 if not isinstance(member, ZipInfo):
987 member = self.getinfo(member)
988
989 if path is None:
990 path = os.getcwd()
991
992 return self._extract_member(member, path, pwd)
993
994 def extractall(self, path=None, members=None, pwd=None):
995 """Extract all members from the archive to the current working
996 directory. `path' specifies a different directory to extract to.
997 `members' is optional and must be a subset of the list returned
998 by namelist().
999 """
1000 if members is None:
1001 members = self.namelist()
1002
1003 for zipinfo in members:
1004 self.extract(zipinfo, path, pwd)
1005
1006 def _extract_member(self, member, targetpath, pwd):
1007 """Extract the ZipInfo object 'member' to a physical
1008 file on the path targetpath.
1009 """
1010 # build the destination pathname, replacing
1011 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +00001012 # Strip trailing path separator, unless it represents the root.
1013 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1014 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +00001015 targetpath = targetpath[:-1]
1016
1017 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +00001018 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +00001019 targetpath = os.path.join(targetpath, member.filename[1:])
1020 else:
1021 targetpath = os.path.join(targetpath, member.filename)
1022
1023 targetpath = os.path.normpath(targetpath)
1024
1025 # Create all upper directories if necessary.
1026 upperdirs = os.path.dirname(targetpath)
1027 if upperdirs and not os.path.exists(upperdirs):
1028 os.makedirs(upperdirs)
1029
Martin v. Löwis59e47792009-01-24 14:10:07 +00001030 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +00001031 if not os.path.isdir(targetpath):
1032 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001033 return targetpath
1034
Georg Brandlb533e262008-05-25 18:19:30 +00001035 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +00001036 target = open(targetpath, "wb")
1037 shutil.copyfileobj(source, target)
1038 source.close()
1039 target.close()
1040
1041 return targetpath
1042
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001043 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +00001044 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +00001045 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +00001046 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001047 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001048 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +00001049 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001050 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001051 raise RuntimeError(
1052 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001053 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001054 raise RuntimeError(
1055 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001056 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001057 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001058 if zinfo.file_size > ZIP64_LIMIT:
1059 if not self._allowZip64:
1060 raise LargeZipFile("Filesize would require ZIP64 extensions")
1061 if zinfo.header_offset > ZIP64_LIMIT:
1062 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001063 raise LargeZipFile(
1064 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001065
1066 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001067 """Put the bytes from filename into the archive under the name
1068 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001069 if not self.fp:
1070 raise RuntimeError(
1071 "Attempt to write to ZIP archive that was already closed")
1072
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001073 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001074 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001075 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076 date_time = mtime[0:6]
1077 # Create ZipInfo instance to store file information
1078 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001079 arcname = filename
1080 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1081 while arcname[0] in (os.sep, os.altsep):
1082 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001083 if isdir:
1084 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001085 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001086 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001087 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001088 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 else:
Tim Peterse1190062001-01-15 03:34:38 +00001090 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001091
1092 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001093 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001094 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001095
1096 self._writecheck(zinfo)
1097 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001098
1099 if isdir:
1100 zinfo.file_size = 0
1101 zinfo.compress_size = 0
1102 zinfo.CRC = 0
1103 self.filelist.append(zinfo)
1104 self.NameToInfo[zinfo.filename] = zinfo
1105 self.fp.write(zinfo.FileHeader())
1106 return
1107
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001108 with open(filename, "rb") as fp:
1109 # Must overwrite CRC and sizes with correct data later
1110 zinfo.CRC = CRC = 0
1111 zinfo.compress_size = compress_size = 0
1112 zinfo.file_size = file_size = 0
1113 self.fp.write(zinfo.FileHeader())
1114 if zinfo.compress_type == ZIP_DEFLATED:
1115 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1116 zlib.DEFLATED, -15)
1117 else:
1118 cmpr = None
1119 while 1:
1120 buf = fp.read(1024 * 8)
1121 if not buf:
1122 break
1123 file_size = file_size + len(buf)
1124 CRC = crc32(buf, CRC) & 0xffffffff
1125 if cmpr:
1126 buf = cmpr.compress(buf)
1127 compress_size = compress_size + len(buf)
1128 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001129 if cmpr:
1130 buf = cmpr.flush()
1131 compress_size = compress_size + len(buf)
1132 self.fp.write(buf)
1133 zinfo.compress_size = compress_size
1134 else:
1135 zinfo.compress_size = file_size
1136 zinfo.CRC = CRC
1137 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001138 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001139 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001140 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001141 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001142 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001143 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001144 self.filelist.append(zinfo)
1145 self.NameToInfo[zinfo.filename] = zinfo
1146
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001147 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001148 """Write a file into the archive. The contents is 'data', which
1149 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1150 it is encoded as UTF-8 first.
1151 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001152 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001153 if isinstance(data, str):
1154 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001155 if not isinstance(zinfo_or_arcname, ZipInfo):
1156 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001157 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001158 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001159 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001160 else:
1161 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001162
1163 if not self.fp:
1164 raise RuntimeError(
1165 "Attempt to write to ZIP archive that was already closed")
1166
Guido van Rossum85825dc2007-08-27 17:03:28 +00001167 zinfo.file_size = len(data) # Uncompressed size
1168 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001169 if compress_type is not None:
1170 zinfo.compress_type = compress_type
1171
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001172 self._writecheck(zinfo)
1173 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001174 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001175 if zinfo.compress_type == ZIP_DEFLATED:
1176 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1177 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001178 data = co.compress(data) + co.flush()
1179 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001180 else:
1181 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001182 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001183 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001184 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001185 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001186 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001187 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001188 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001189 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001190 self.filelist.append(zinfo)
1191 self.NameToInfo[zinfo.filename] = zinfo
1192
1193 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001194 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001195 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001196
1197 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001198 """Close the file, and for mode "w" and "a" write the ending
1199 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001200 if self.fp is None:
1201 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001202
1203 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001204 count = 0
1205 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001206 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001207 count = count + 1
1208 dt = zinfo.date_time
1209 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001210 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001211 extra = []
1212 if zinfo.file_size > ZIP64_LIMIT \
1213 or zinfo.compress_size > ZIP64_LIMIT:
1214 extra.append(zinfo.file_size)
1215 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001216 file_size = 0xffffffff
1217 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001218 else:
1219 file_size = zinfo.file_size
1220 compress_size = zinfo.compress_size
1221
1222 if zinfo.header_offset > ZIP64_LIMIT:
1223 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001224 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001225 else:
1226 header_offset = zinfo.header_offset
1227
1228 extra_data = zinfo.extra
1229 if extra:
1230 # Append a ZIP64 field to the extra's
1231 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001232 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001233 1, 8*len(extra), *extra) + extra_data
1234
1235 extract_version = max(45, zinfo.extract_version)
1236 create_version = max(45, zinfo.create_version)
1237 else:
1238 extract_version = zinfo.extract_version
1239 create_version = zinfo.create_version
1240
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001241 try:
1242 filename, flag_bits = zinfo._encodeFilenameFlags()
1243 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001244 stringCentralDir, create_version,
1245 zinfo.create_system, extract_version, zinfo.reserved,
1246 flag_bits, zinfo.compress_type, dostime, dosdate,
1247 zinfo.CRC, compress_size, file_size,
1248 len(filename), len(extra_data), len(zinfo.comment),
1249 0, zinfo.internal_attr, zinfo.external_attr,
1250 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001251 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001252 print((structCentralDir, stringCentralDir, create_version,
1253 zinfo.create_system, extract_version, zinfo.reserved,
1254 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1255 zinfo.CRC, compress_size, file_size,
1256 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1257 0, zinfo.internal_attr, zinfo.external_attr,
1258 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001259 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001260 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001261 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001262 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001263 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001264
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001265 pos2 = self.fp.tell()
1266 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001267 centDirCount = count
1268 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001269 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001270 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1271 centDirOffset > ZIP64_LIMIT or
1272 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001273 # Need to write the ZIP64 end-of-archive records
1274 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001275 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001276 44, 45, 45, 0, 0, centDirCount, centDirCount,
1277 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001278 self.fp.write(zip64endrec)
1279
1280 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001281 structEndArchive64Locator,
1282 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001283 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001284 centDirCount = min(centDirCount, 0xFFFF)
1285 centDirSize = min(centDirSize, 0xFFFFFFFF)
1286 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001287
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001288 # check for valid comment length
1289 if len(self.comment) >= ZIP_MAX_COMMENT:
1290 if self.debug > 0:
1291 msg = 'Archive comment is too long; truncating to %d bytes' \
1292 % ZIP_MAX_COMMENT
1293 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001294
Georg Brandl2ee470f2008-07-16 12:55:28 +00001295 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001296 0, 0, centDirCount, centDirCount,
1297 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001298 self.fp.write(endrec)
1299 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001300 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001301
Fred Drake3d9091e2001-03-26 15:49:24 +00001302 if not self._filePassed:
1303 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 self.fp = None
1305
1306
1307class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001308 """Class to create ZIP archives with Python library files and packages."""
1309
Georg Brandl8334fd92010-12-04 10:26:46 +00001310 def __init__(self, file, mode="r", compression=ZIP_STORED,
1311 allowZip64=False, optimize=-1):
1312 ZipFile.__init__(self, file, mode=mode, compression=compression,
1313 allowZip64=allowZip64)
1314 self._optimize = optimize
1315
Georg Brandlfe991052009-09-16 15:54:04 +00001316 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 """Add all files from "pathname" to the ZIP archive.
1318
Fred Drake484d7352000-10-02 21:14:52 +00001319 If pathname is a package directory, search the directory and
1320 all package subdirectories recursively for all *.py and enter
1321 the modules into the archive. If pathname is a plain
1322 directory, listdir *.py and enter all modules. Else, pathname
1323 must be a Python *.py file and the module will be put into the
1324 archive. Added modules are always module.pyo or module.pyc.
1325 This method will compile the module.py into module.pyc if
1326 necessary.
1327 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001328 dir, name = os.path.split(pathname)
1329 if os.path.isdir(pathname):
1330 initname = os.path.join(pathname, "__init__.py")
1331 if os.path.isfile(initname):
1332 # This is a package directory, add it
1333 if basename:
1334 basename = "%s/%s" % (basename, name)
1335 else:
1336 basename = name
1337 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001338 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001339 fname, arcname = self._get_codename(initname[0:-3], basename)
1340 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001341 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001342 self.write(fname, arcname)
1343 dirlist = os.listdir(pathname)
1344 dirlist.remove("__init__.py")
1345 # Add all *.py files and package subdirectories
1346 for filename in dirlist:
1347 path = os.path.join(pathname, filename)
1348 root, ext = os.path.splitext(filename)
1349 if os.path.isdir(path):
1350 if os.path.isfile(os.path.join(path, "__init__.py")):
1351 # This is a package directory, add it
1352 self.writepy(path, basename) # Recursive call
1353 elif ext == ".py":
1354 fname, arcname = self._get_codename(path[0:-3],
1355 basename)
1356 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001357 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001358 self.write(fname, arcname)
1359 else:
1360 # This is NOT a package directory, add its files at top level
1361 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001362 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001363 for filename in os.listdir(pathname):
1364 path = os.path.join(pathname, filename)
1365 root, ext = os.path.splitext(filename)
1366 if ext == ".py":
1367 fname, arcname = self._get_codename(path[0:-3],
1368 basename)
1369 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001370 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371 self.write(fname, arcname)
1372 else:
1373 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001374 raise RuntimeError(
1375 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001376 fname, arcname = self._get_codename(pathname[0:-3], basename)
1377 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001378 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001379 self.write(fname, arcname)
1380
1381 def _get_codename(self, pathname, basename):
1382 """Return (filename, archivename) for the path.
1383
Fred Drake484d7352000-10-02 21:14:52 +00001384 Given a module name path, return the correct file path and
1385 archive name, compiling if necessary. For example, given
1386 /python/lib/string, return (/python/lib/string.pyc, string).
1387 """
Georg Brandl8334fd92010-12-04 10:26:46 +00001388 def _compile(file, optimize=-1):
1389 import py_compile
1390 if self.debug:
1391 print("Compiling", file)
1392 try:
1393 py_compile.compile(file, doraise=True, optimize=optimize)
1394 except py_compile.PyCompileError as error:
1395 print(err.msg)
1396 return False
1397 return True
1398
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001399 file_py = pathname + ".py"
1400 file_pyc = pathname + ".pyc"
1401 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001402 pycache_pyc = imp.cache_from_source(file_py, True)
1403 pycache_pyo = imp.cache_from_source(file_py, False)
Georg Brandl8334fd92010-12-04 10:26:46 +00001404 if self._optimize == -1:
1405 # legacy mode: use whatever file is present
1406 if (os.path.isfile(file_pyo) and
1407 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1408 # Use .pyo file.
1409 arcname = fname = file_pyo
1410 elif (os.path.isfile(file_pyc) and
1411 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1412 # Use .pyc file.
1413 arcname = fname = file_pyc
1414 elif (os.path.isfile(pycache_pyc) and
1415 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1416 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1417 # file name in the archive.
1418 fname = pycache_pyc
1419 arcname = file_pyc
1420 elif (os.path.isfile(pycache_pyo) and
1421 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1422 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1423 # file name in the archive.
1424 fname = pycache_pyo
1425 arcname = file_pyo
Barry Warsaw28a691b2010-04-17 00:19:56 +00001426 else:
Georg Brandl8334fd92010-12-04 10:26:46 +00001427 # Compile py into PEP 3147 pyc file.
1428 if _compile(file_py):
1429 fname = (pycache_pyc if __debug__ else pycache_pyo)
1430 arcname = (file_pyc if __debug__ else file_pyo)
1431 else:
1432 fname = arcname = file_py
1433 else:
1434 # new mode: use given optimization level
1435 if self._optimize == 0:
1436 fname = pycache_pyc
1437 arcname = file_pyc
1438 else:
1439 fname = pycache_pyo
1440 arcname = file_pyo
1441 if not (os.path.isfile(fname) and
1442 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1443 if not _compile(file_py, optimize=self._optimize):
1444 fname = arcname = file_py
Barry Warsaw28a691b2010-04-17 00:19:56 +00001445 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001446 if basename:
1447 archivename = "%s/%s" % (basename, archivename)
1448 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001449
1450
1451def main(args = None):
1452 import textwrap
1453 USAGE=textwrap.dedent("""\
1454 Usage:
1455 zipfile.py -l zipfile.zip # Show listing of a zipfile
1456 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1457 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1458 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1459 """)
1460 if args is None:
1461 args = sys.argv[1:]
1462
1463 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001464 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001465 sys.exit(1)
1466
1467 if args[0] == '-l':
1468 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001469 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001470 sys.exit(1)
1471 zf = ZipFile(args[1], 'r')
1472 zf.printdir()
1473 zf.close()
1474
1475 elif args[0] == '-t':
1476 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001477 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001478 sys.exit(1)
1479 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001480 badfile = zf.testzip()
1481 if badfile:
1482 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001483 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001484
1485 elif args[0] == '-e':
1486 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001487 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001488 sys.exit(1)
1489
1490 zf = ZipFile(args[1], 'r')
1491 out = args[2]
1492 for path in zf.namelist():
1493 if path.startswith('./'):
1494 tgt = os.path.join(out, path[2:])
1495 else:
1496 tgt = os.path.join(out, path)
1497
1498 tgtdir = os.path.dirname(tgt)
1499 if not os.path.exists(tgtdir):
1500 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001501 with open(tgt, 'wb') as fp:
1502 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001503 zf.close()
1504
1505 elif args[0] == '-c':
1506 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001507 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001508 sys.exit(1)
1509
1510 def addToZip(zf, path, zippath):
1511 if os.path.isfile(path):
1512 zf.write(path, zippath, ZIP_DEFLATED)
1513 elif os.path.isdir(path):
1514 for nm in os.listdir(path):
1515 addToZip(zf,
1516 os.path.join(path, nm), os.path.join(zippath, nm))
1517 # else: ignore
1518
1519 zf = ZipFile(args[1], 'w', allowZip64=True)
1520 for src in args[2:]:
1521 addToZip(zf, src, os.path.basename(src))
1522
1523 zf.close()
1524
1525if __name__ == "__main__":
1526 main()