blob: c47c3ccfa33fe5fa34df9f653df3e9b03c210b62 [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Skip Montanaro40fc1602001-03-01 04:27:19 +000025__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000026 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Fred Drake5db246d2000-09-29 20:44:48 +000028class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Tim Peterse1190062001-01-15 03:34:38 +000038error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000040ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000041ZIP_FILECOUNT_LIMIT = 1 << 16
42ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044# constants for Zip file compression methods
45ZIP_STORED = 0
46ZIP_DEFLATED = 8
47# Other ZIP compression methods not supported
48
Martin v. Löwisb09b8442008-07-03 14:13:42 +000049# Below are some formats and associated data for reading/writing headers using
50# the struct module. The names and structures of headers/records are those used
51# in the PKWARE description of the ZIP file format:
52# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
53# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055# The "end of central directory" structure, magic number, size, and indices
56# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000057structEndArchive = b"<4s4H2LH"
58stringEndArchive = b"PK\005\006"
59sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000060
61_ECD_SIGNATURE = 0
62_ECD_DISK_NUMBER = 1
63_ECD_DISK_START = 2
64_ECD_ENTRIES_THIS_DISK = 3
65_ECD_ENTRIES_TOTAL = 4
66_ECD_SIZE = 5
67_ECD_OFFSET = 6
68_ECD_COMMENT_SIZE = 7
69# These last two indices are not part of the structure as defined in the
70# spec, but they are used internally by this module as a convenience
71_ECD_COMMENT = 8
72_ECD_LOCATION = 9
73
74# The "central directory" structure, magic number, size, and indices
75# of entries in the structure (section V.F in the format document)
76structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000077stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078sizeCentralDir = struct.calcsize(structCentralDir)
79
Fred Drake3e038e52001-02-28 17:56:26 +000080# indexes of entries in the central directory structure
81_CD_SIGNATURE = 0
82_CD_CREATE_VERSION = 1
83_CD_CREATE_SYSTEM = 2
84_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000086_CD_FLAG_BITS = 5
87_CD_COMPRESS_TYPE = 6
88_CD_TIME = 7
89_CD_DATE = 8
90_CD_CRC = 9
91_CD_COMPRESSED_SIZE = 10
92_CD_UNCOMPRESSED_SIZE = 11
93_CD_FILENAME_LENGTH = 12
94_CD_EXTRA_FIELD_LENGTH = 13
95_CD_COMMENT_LENGTH = 14
96_CD_DISK_NUMBER_START = 15
97_CD_INTERNAL_FILE_ATTRIBUTES = 16
98_CD_EXTERNAL_FILE_ATTRIBUTES = 17
99_CD_LOCAL_HEADER_OFFSET = 18
100
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101# The "local file header" structure, magic number, size, and indices
102# (section V.A in the format document)
103structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000104stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105sizeFileHeader = struct.calcsize(structFileHeader)
106
Fred Drake3e038e52001-02-28 17:56:26 +0000107_FH_SIGNATURE = 0
108_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000110_FH_GENERAL_PURPOSE_FLAG_BITS = 3
111_FH_COMPRESSION_METHOD = 4
112_FH_LAST_MOD_TIME = 5
113_FH_LAST_MOD_DATE = 6
114_FH_CRC = 7
115_FH_COMPRESSED_SIZE = 8
116_FH_UNCOMPRESSED_SIZE = 9
117_FH_FILENAME_LENGTH = 10
118_FH_EXTRA_FIELD_LENGTH = 11
119
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000120# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000121structEndArchive64Locator = "<4sLQL"
122stringEndArchive64Locator = b"PK\x06\x07"
123sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124
125# The "Zip64 end of central directory" record, magic number, size, and indices
126# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127structEndArchive64 = "<4sQ2H2L4Q"
128stringEndArchive64 = b"PK\x06\x06"
129sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130
131_CD64_SIGNATURE = 0
132_CD64_DIRECTORY_RECSIZE = 1
133_CD64_CREATE_VERSION = 2
134_CD64_EXTRACT_VERSION = 3
135_CD64_DISK_NUMBER = 4
136_CD64_DISK_NUMBER_START = 5
137_CD64_NUMBER_ENTRIES_THIS_DISK = 6
138_CD64_NUMBER_ENTRIES_TOTAL = 7
139_CD64_DIRECTORY_SIZE = 8
140_CD64_OFFSET_START_CENTDIR = 9
141
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000142def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000143 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000144 if _EndRecData(fp):
145 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000146 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000147 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000148 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000149
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000150def is_zipfile(filename):
151 """Quickly see if a file is a ZIP file by checking the magic number.
152
153 The filename argument may be a file or file-like object too.
154 """
155 result = False
156 try:
157 if hasattr(filename, "read"):
158 result = _check_zipfile(fp=filename)
159 else:
160 with open(filename, "rb") as fp:
161 result = _check_zipfile(fp)
162 except IOError:
163 pass
164 return result
165
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000166def _EndRecData64(fpin, offset, endrec):
167 """
168 Read the ZIP64 end-of-archive records and use that to update endrec
169 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000170 fpin.seek(offset - sizeEndCentDir64Locator, 2)
171 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 return endrec
175
176 if diskno != 0 or disks != 1:
177 raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000182 sig, sz, create_version, read_version, disk_num, disk_dir, \
183 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000184 struct.unpack(structEndArchive64, data)
185 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186 return endrec
187
188 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000189 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000190 endrec[_ECD_DISK_NUMBER] = disk_num
191 endrec[_ECD_DISK_START] = disk_dir
192 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
193 endrec[_ECD_ENTRIES_TOTAL] = dircount2
194 endrec[_ECD_SIZE] = dirsize
195 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000196 return endrec
197
198
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000199def _EndRecData(fpin):
200 """Return data from the "End of Central Directory" record, or None.
201
202 The data is a list of the nine items in the ZIP "End of central dir"
203 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000204
205 # Determine file size
206 fpin.seek(0, 2)
207 filesize = fpin.tell()
208
209 # Check to see if this is ZIP file with no archive comment (the
210 # "end of central directory" structure should be the last item in the
211 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000212 try:
213 fpin.seek(-sizeEndCentDir, 2)
214 except IOError:
215 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000216 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000219 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec=list(endrec)
221
222 # Append a blank comment and record start offset
223 endrec.append(b"")
224 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000226 # Try to read the "Zip64 end of central directory" structure
227 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000228
229 # Either this is not a ZIP file, or it is a ZIP file with an archive
230 # comment. Search the end of the file for the "end of central directory"
231 # record signature. The comment is the last item in the ZIP file and may be
232 # up to 64K long. It is assumed that the "end of central directory" magic
233 # number does not appear in the comment.
234 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
235 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000237 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 if start >= 0:
239 # found the magic number; attempt to unpack and interpret
240 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000242 comment = data[start+sizeEndCentDir:]
243 # check that comment length is correct
244 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000245 # Append the archive comment and start offset
246 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000248
249 # Try to read the "Zip64 end of central directory" structure
250 return _EndRecData64(fpin, maxCommentStart + start - filesize,
251 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252
253 # Unable to find a valid end of central directory structure
254 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255
Fred Drake484d7352000-10-02 21:14:52 +0000256
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000257class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000258 """Class with attributes describing each file in the ZIP archive."""
259
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000260 __slots__ = (
261 'orig_filename',
262 'filename',
263 'date_time',
264 'compress_type',
265 'comment',
266 'extra',
267 'create_system',
268 'create_version',
269 'extract_version',
270 'reserved',
271 'flag_bits',
272 'volume',
273 'internal_attr',
274 'external_attr',
275 'header_offset',
276 'CRC',
277 'compress_size',
278 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000279 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000280 )
281
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000282 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000283 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000284
285 # Terminate the file name at the first null byte. Null bytes in file
286 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000287 null_byte = filename.find(chr(0))
288 if null_byte >= 0:
289 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 # This is used to ensure paths in generated ZIP files always use
291 # forward slashes as the directory separator, as required by the
292 # ZIP format specification.
293 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000294 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295
Greg Ward8e36d282003-06-18 00:53:06 +0000296 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000297 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000299 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000300 self.comment = b"" # Comment for each file
301 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000302 if sys.platform == 'win32':
303 self.create_system = 0 # System which created ZIP archive
304 else:
305 # Assume everything else is unix-y
306 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000307 self.create_version = 20 # Version which created ZIP archive
308 self.extract_version = 20 # Version needed to extract archive
309 self.reserved = 0 # Must be zero
310 self.flag_bits = 0 # ZIP flag bits
311 self.volume = 0 # Volume number of file header
312 self.internal_attr = 0 # Internal attributes
313 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000314 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000315 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000316 # CRC CRC-32 of the uncompressed file
317 # compress_size Size of the compressed file
318 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319
320 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000321 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 dt = self.date_time
323 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000324 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000325 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000326 # Set these to zero because we write them after the file data
327 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 else:
Tim Peterse1190062001-01-15 03:34:38 +0000329 CRC = self.CRC
330 compress_size = self.compress_size
331 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332
333 extra = self.extra
334
335 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
336 # File is larger than what fits into a 4 byte integer,
337 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000338 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339 extra = extra + struct.pack(fmt,
340 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000341 file_size = 0xffffffff
342 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000343 self.extract_version = max(45, self.extract_version)
344 self.create_version = max(45, self.extract_version)
345
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000346 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000347 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000348 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000349 self.compress_type, dostime, dosdate, CRC,
350 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000351 len(filename), len(extra))
352 return header + filename + extra
353
354 def _encodeFilenameFlags(self):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359
360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000365 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 if tp == 1:
367 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000368 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000370 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000371 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 elif ln == 0:
374 counts = ()
375 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000376 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000381 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000385 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000389 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000395
396
Thomas Wouterscf297e42007-02-23 15:07:44 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000402 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000445 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000446
447 def __call__(self, c):
448 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000449 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000452 self._UpdateKeys(c)
453 return c
454
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000455class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000456 """File-like object for reading an archive member.
457 Is returned by ZipFile.open().
458 """
459
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000460 # Max size supported by decompressor.
461 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000463 # Read from compressed files in 4k blocks.
464 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000465
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000466 # Search for universal newlines or line chunks.
467 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
468
469 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
470 self._fileobj = fileobj
471 self._decrypter = decrypter
472
Ezio Melotti92b47432010-01-28 01:44:41 +0000473 self._compress_type = zipinfo.compress_type
474 self._compress_size = zipinfo.compress_size
475 self._compress_left = zipinfo.compress_size
476
477 if self._compress_type == ZIP_DEFLATED:
478 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000479 self._unconsumed = b''
480
481 self._readbuffer = b''
482 self._offset = 0
483
484 self._universal = 'U' in mode
485 self.newlines = None
486
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000487 # Adjust read size for encrypted files since the first 12 bytes
488 # are for the encryption/password information.
489 if self._decrypter is not None:
490 self._compress_left -= 12
491
492 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493 self.name = zipinfo.filename
494
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000495 if hasattr(zipinfo, 'CRC'):
496 self._expected_crc = zipinfo.CRC
497 self._running_crc = crc32(b'') & 0xffffffff
498 else:
499 self._expected_crc = None
500
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000501 def readline(self, limit=-1):
502 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000503
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000504 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000505 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000506
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000507 if not self._universal and limit < 0:
508 # Shortcut common case - newline found in buffer.
509 i = self._readbuffer.find(b'\n', self._offset) + 1
510 if i > 0:
511 line = self._readbuffer[self._offset: i]
512 self._offset = i
513 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000514
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000515 if not self._universal:
516 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000517
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000518 line = b''
519 while limit < 0 or len(line) < limit:
520 readahead = self.peek(2)
521 if readahead == b'':
522 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000523
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000524 #
525 # Search for universal newlines or line chunks.
526 #
527 # The pattern returns either a line chunk or a newline, but not
528 # both. Combined with peek(2), we are assured that the sequence
529 # '\r\n' is always retrieved completely and never split into
530 # separate newlines - '\r', '\n' due to coincidental readaheads.
531 #
532 match = self.PATTERN.search(readahead)
533 newline = match.group('newline')
534 if newline is not None:
535 if self.newlines is None:
536 self.newlines = []
537 if newline not in self.newlines:
538 self.newlines.append(newline)
539 self._offset += len(newline)
540 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000541
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000542 chunk = match.group('chunk')
543 if limit >= 0:
544 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000545
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000546 self._offset += len(chunk)
547 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000548
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000549 return line
550
551 def peek(self, n=1):
552 """Returns buffered bytes without advancing the position."""
553 if n > len(self._readbuffer) - self._offset:
554 chunk = self.read(n)
555 self._offset -= len(chunk)
556
557 # Return up to 512 bytes to reduce allocation overhead for tight loops.
558 return self._readbuffer[self._offset: self._offset + 512]
559
560 def readable(self):
561 return True
562
563 def read(self, n=-1):
564 """Read and return up to n bytes.
565 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000566 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000568 buf = b''
569 while n < 0 or n is None or n > len(buf):
570 data = self.read1(n)
571 if len(data) == 0:
572 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000573
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000574 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000575
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000576 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000577
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000578 def _update_crc(self, newdata, eof):
579 # Update the CRC using the given data.
580 if self._expected_crc is None:
581 # No need to compute the CRC if we don't have a reference value
582 return
583 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
584 # Check the CRC if we're at the end of the file
585 if eof and self._running_crc != self._expected_crc:
586 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
587
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000588 def read1(self, n):
589 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000590
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000591 # Simplify algorithm (branching) by transforming negative n to large n.
592 if n < 0 or n is None:
593 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000594
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000595 # Bytes available in read buffer.
596 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000598 # Read from file.
599 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
600 nbytes = n - len_readbuffer - len(self._unconsumed)
601 nbytes = max(nbytes, self.MIN_READ_SIZE)
602 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000603
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000604 data = self._fileobj.read(nbytes)
605 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000606
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000607 if data and self._decrypter is not None:
608 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000609
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000610 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000611 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000612 self._readbuffer = self._readbuffer[self._offset:] + data
613 self._offset = 0
614 else:
615 # Prepare deflated bytes for decompression.
616 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000617
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000618 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000619 if (len(self._unconsumed) > 0 and n > len_readbuffer and
620 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621 data = self._decompressor.decompress(
622 self._unconsumed,
623 max(n - len_readbuffer, self.MIN_READ_SIZE)
624 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000625
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000626 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000627 eof = len(self._unconsumed) == 0 and self._compress_left == 0
628 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000629 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000630
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000631 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000632 self._readbuffer = self._readbuffer[self._offset:] + data
633 self._offset = 0
634
635 # Read from buffer.
636 data = self._readbuffer[self._offset: self._offset + n]
637 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000638 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000639
640
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000641
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000642class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000643 """ Class with methods to open, read, write, close, list zip files.
644
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000645 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000646
Fred Drake3d9091e2001-03-26 15:49:24 +0000647 file: Either the path to the file, or a file-like object.
648 If it is a path, the file will be opened and closed by ZipFile.
649 mode: The mode can be either read "r", write "w" or append "a".
650 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000651 allowZip64: if True ZipFile will create files with ZIP64 extensions when
652 needed, otherwise it will raise an exception when this would
653 be necessary.
654
Fred Drake3d9091e2001-03-26 15:49:24 +0000655 """
Fred Drake484d7352000-10-02 21:14:52 +0000656
Fred Drake90eac282001-02-28 05:29:34 +0000657 fp = None # Set here since __del__ checks it
658
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000659 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000660 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000661 if mode not in ("r", "w", "a"):
662 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
663
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000664 if compression == ZIP_STORED:
665 pass
666 elif compression == ZIP_DEFLATED:
667 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000668 raise RuntimeError(
669 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000670 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000671 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000672
673 self._allowZip64 = allowZip64
674 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000675 self.debug = 0 # Level of printing: 0 through 3
676 self.NameToInfo = {} # Find file info given name
677 self.filelist = [] # List of ZipInfo instances for archive
678 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000679 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000680 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000681 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000682
Fred Drake3d9091e2001-03-26 15:49:24 +0000683 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000684 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000685 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 self._filePassed = 0
687 self.filename = file
688 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000689 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000690 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000691 except IOError:
692 if mode == 'a':
693 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000694 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000695 else:
696 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000697 else:
698 self._filePassed = 1
699 self.fp = file
700 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000701
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000702 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000703 self._GetContents()
704 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000705 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000707 try: # See if file is a zip file
708 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000710 self.fp.seek(self.start_dir, 0)
711 except BadZipfile: # file is not a zip file, just append
712 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000713 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000714 if not self._filePassed:
715 self.fp.close()
716 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000717 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000718
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000719 def __enter__(self):
720 return self
721
722 def __exit__(self, type, value, traceback):
723 self.close()
724
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000725 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000726 """Read the directory, making sure we close the file if the format
727 is bad."""
728 try:
729 self._RealGetContents()
730 except BadZipfile:
731 if not self._filePassed:
732 self.fp.close()
733 self.fp = None
734 raise
735
736 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000737 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000739 endrec = _EndRecData(fp)
740 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000741 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000742 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000743 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000744 size_cd = endrec[_ECD_SIZE] # bytes in central directory
745 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
746 self.comment = endrec[_ECD_COMMENT] # archive comment
747
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000748 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000749 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000750 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
751 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000752 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
753
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000754 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000755 inferred = concat + offset_cd
756 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 # self.start_dir: Position of start of central directory
758 self.start_dir = offset_cd + concat
759 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000760 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000761 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000762 total = 0
763 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000764 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000765 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000766 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000767 centdir = struct.unpack(structCentralDir, centdir)
768 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000769 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000770 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000771 flags = centdir[5]
772 if flags & 0x800:
773 # UTF-8 file names extension
774 filename = filename.decode('utf-8')
775 else:
776 # Historical ZIP filename encoding
777 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000778 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000779 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000780 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
781 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000782 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 (x.create_version, x.create_system, x.extract_version, x.reserved,
784 x.flag_bits, x.compress_type, t, d,
785 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
786 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
787 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000788 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000790 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000791
792 x._decodeExtra()
793 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000794 self.filelist.append(x)
795 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000796
797 # update total bytes read from central directory
798 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
799 + centdir[_CD_EXTRA_FIELD_LENGTH]
800 + centdir[_CD_COMMENT_LENGTH])
801
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000802 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000803 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000804
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805
806 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000807 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000808 l = []
809 for data in self.filelist:
810 l.append(data.filename)
811 return l
812
813 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000814 """Return a list of class ZipInfo instances for files in the
815 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000816 return self.filelist
817
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000818 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000819 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000820 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
821 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000822 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000823 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000824 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
825 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000826
827 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000828 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000829 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000830 for zinfo in self.filelist:
831 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000832 # Read by chunks, to avoid an OverflowError or a
833 # MemoryError with very large embedded files.
834 f = self.open(zinfo.filename, "r")
835 while f.read(chunk_size): # Check CRC-32
836 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000837 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000838 return zinfo.filename
839
840 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000841 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000842 info = self.NameToInfo.get(name)
843 if info is None:
844 raise KeyError(
845 'There is no item named %r in the archive' % name)
846
847 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000848
Thomas Wouterscf297e42007-02-23 15:07:44 +0000849 def setpassword(self, pwd):
850 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000851 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000852 self.pwd = pwd
853
854 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000855 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 return self.open(name, "r", pwd).read()
857
858 def open(self, name, mode="r", pwd=None):
859 """Return file-like object for 'name'."""
860 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000861 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000862 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000863 raise RuntimeError(
864 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000865
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 # Only open a new file for instances where we were not
867 # given a file object in the constructor
868 if self._filePassed:
869 zef_file = self.fp
870 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000871 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872
Georg Brandlb533e262008-05-25 18:19:30 +0000873 # Make sure we have an info object
874 if isinstance(name, ZipInfo):
875 # 'name' is already an info object
876 zinfo = name
877 else:
878 # Get info object for name
879 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880
881 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000882
883 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000884 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000885 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000886 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000887
888 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000890 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000892
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000893 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000894 raise BadZipfile(
895 'File name in directory %r and header %r differ.'
896 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000897
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898 # check for encrypted flag & handle password
899 is_encrypted = zinfo.flag_bits & 0x1
900 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000901 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902 if not pwd:
903 pwd = self.pwd
904 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000905 raise RuntimeError("File %s is encrypted, "
906 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000907
Thomas Wouterscf297e42007-02-23 15:07:44 +0000908 zd = _ZipDecrypter(pwd)
909 # The first 12 bytes in the cypher stream is an encryption header
910 # used to strengthen the algorithm. The first 11 bytes are
911 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000912 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000913 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000914 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000915 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000916 if zinfo.flag_bits & 0x8:
917 # compare against the file type from extended local headers
918 check_byte = (zinfo._raw_time >> 8) & 0xff
919 else:
920 # compare against the CRC otherwise
921 check_byte = (zinfo.CRC >> 24) & 0xff
922 if h[11] != check_byte:
923 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000925 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000926
Christian Heimes790c8232008-01-07 21:14:23 +0000927 def extract(self, member, path=None, pwd=None):
928 """Extract a member from the archive to the current working directory,
929 using its full name. Its file information is extracted as accurately
930 as possible. `member' may be a filename or a ZipInfo object. You can
931 specify a different directory using `path'.
932 """
933 if not isinstance(member, ZipInfo):
934 member = self.getinfo(member)
935
936 if path is None:
937 path = os.getcwd()
938
939 return self._extract_member(member, path, pwd)
940
941 def extractall(self, path=None, members=None, pwd=None):
942 """Extract all members from the archive to the current working
943 directory. `path' specifies a different directory to extract to.
944 `members' is optional and must be a subset of the list returned
945 by namelist().
946 """
947 if members is None:
948 members = self.namelist()
949
950 for zipinfo in members:
951 self.extract(zipinfo, path, pwd)
952
953 def _extract_member(self, member, targetpath, pwd):
954 """Extract the ZipInfo object 'member' to a physical
955 file on the path targetpath.
956 """
957 # build the destination pathname, replacing
958 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000959 # Strip trailing path separator, unless it represents the root.
960 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
961 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000962 targetpath = targetpath[:-1]
963
964 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000965 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000966 targetpath = os.path.join(targetpath, member.filename[1:])
967 else:
968 targetpath = os.path.join(targetpath, member.filename)
969
970 targetpath = os.path.normpath(targetpath)
971
972 # Create all upper directories if necessary.
973 upperdirs = os.path.dirname(targetpath)
974 if upperdirs and not os.path.exists(upperdirs):
975 os.makedirs(upperdirs)
976
Martin v. Löwis59e47792009-01-24 14:10:07 +0000977 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000978 if not os.path.isdir(targetpath):
979 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000980 return targetpath
981
Georg Brandlb533e262008-05-25 18:19:30 +0000982 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000983 target = open(targetpath, "wb")
984 shutil.copyfileobj(source, target)
985 source.close()
986 target.close()
987
988 return targetpath
989
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000990 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000991 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000992 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000993 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000994 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000995 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000996 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000997 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000998 raise RuntimeError(
999 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001000 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001001 raise RuntimeError(
1002 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001004 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001005 if zinfo.file_size > ZIP64_LIMIT:
1006 if not self._allowZip64:
1007 raise LargeZipFile("Filesize would require ZIP64 extensions")
1008 if zinfo.header_offset > ZIP64_LIMIT:
1009 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001010 raise LargeZipFile(
1011 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001012
1013 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001014 """Put the bytes from filename into the archive under the name
1015 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001016 if not self.fp:
1017 raise RuntimeError(
1018 "Attempt to write to ZIP archive that was already closed")
1019
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001020 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001021 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001022 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 date_time = mtime[0:6]
1024 # Create ZipInfo instance to store file information
1025 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001026 arcname = filename
1027 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1028 while arcname[0] in (os.sep, os.altsep):
1029 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001030 if isdir:
1031 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001032 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001033 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001034 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001035 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001036 else:
Tim Peterse1190062001-01-15 03:34:38 +00001037 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001038
1039 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001040 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001041 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001042
1043 self._writecheck(zinfo)
1044 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001045
1046 if isdir:
1047 zinfo.file_size = 0
1048 zinfo.compress_size = 0
1049 zinfo.CRC = 0
1050 self.filelist.append(zinfo)
1051 self.NameToInfo[zinfo.filename] = zinfo
1052 self.fp.write(zinfo.FileHeader())
1053 return
1054
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001055 with open(filename, "rb") as fp:
1056 # Must overwrite CRC and sizes with correct data later
1057 zinfo.CRC = CRC = 0
1058 zinfo.compress_size = compress_size = 0
1059 zinfo.file_size = file_size = 0
1060 self.fp.write(zinfo.FileHeader())
1061 if zinfo.compress_type == ZIP_DEFLATED:
1062 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1063 zlib.DEFLATED, -15)
1064 else:
1065 cmpr = None
1066 while 1:
1067 buf = fp.read(1024 * 8)
1068 if not buf:
1069 break
1070 file_size = file_size + len(buf)
1071 CRC = crc32(buf, CRC) & 0xffffffff
1072 if cmpr:
1073 buf = cmpr.compress(buf)
1074 compress_size = compress_size + len(buf)
1075 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001076 if cmpr:
1077 buf = cmpr.flush()
1078 compress_size = compress_size + len(buf)
1079 self.fp.write(buf)
1080 zinfo.compress_size = compress_size
1081 else:
1082 zinfo.compress_size = file_size
1083 zinfo.CRC = CRC
1084 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001085 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001086 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001087 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001088 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001089 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001090 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001091 self.filelist.append(zinfo)
1092 self.NameToInfo[zinfo.filename] = zinfo
1093
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001094 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001095 """Write a file into the archive. The contents is 'data', which
1096 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1097 it is encoded as UTF-8 first.
1098 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001099 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001100 if isinstance(data, str):
1101 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001102 if not isinstance(zinfo_or_arcname, ZipInfo):
1103 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001104 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001105 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001106 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001107 else:
1108 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001109
1110 if not self.fp:
1111 raise RuntimeError(
1112 "Attempt to write to ZIP archive that was already closed")
1113
Guido van Rossum85825dc2007-08-27 17:03:28 +00001114 zinfo.file_size = len(data) # Uncompressed size
1115 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001116 if compress_type is not None:
1117 zinfo.compress_type = compress_type
1118
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001119 self._writecheck(zinfo)
1120 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001121 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001122 if zinfo.compress_type == ZIP_DEFLATED:
1123 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1124 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001125 data = co.compress(data) + co.flush()
1126 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001127 else:
1128 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001129 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001130 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001131 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001132 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001133 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001134 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001135 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001136 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001137 self.filelist.append(zinfo)
1138 self.NameToInfo[zinfo.filename] = zinfo
1139
1140 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001141 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001142 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001143
1144 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001145 """Close the file, and for mode "w" and "a" write the ending
1146 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001147 if self.fp is None:
1148 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001149
1150 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001151 count = 0
1152 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001153 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 count = count + 1
1155 dt = zinfo.date_time
1156 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001157 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001158 extra = []
1159 if zinfo.file_size > ZIP64_LIMIT \
1160 or zinfo.compress_size > ZIP64_LIMIT:
1161 extra.append(zinfo.file_size)
1162 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001163 file_size = 0xffffffff
1164 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001165 else:
1166 file_size = zinfo.file_size
1167 compress_size = zinfo.compress_size
1168
1169 if zinfo.header_offset > ZIP64_LIMIT:
1170 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001171 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001172 else:
1173 header_offset = zinfo.header_offset
1174
1175 extra_data = zinfo.extra
1176 if extra:
1177 # Append a ZIP64 field to the extra's
1178 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001179 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001180 1, 8*len(extra), *extra) + extra_data
1181
1182 extract_version = max(45, zinfo.extract_version)
1183 create_version = max(45, zinfo.create_version)
1184 else:
1185 extract_version = zinfo.extract_version
1186 create_version = zinfo.create_version
1187
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001188 try:
1189 filename, flag_bits = zinfo._encodeFilenameFlags()
1190 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001191 stringCentralDir, create_version,
1192 zinfo.create_system, extract_version, zinfo.reserved,
1193 flag_bits, zinfo.compress_type, dostime, dosdate,
1194 zinfo.CRC, compress_size, file_size,
1195 len(filename), len(extra_data), len(zinfo.comment),
1196 0, zinfo.internal_attr, zinfo.external_attr,
1197 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001198 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001199 print((structCentralDir, stringCentralDir, create_version,
1200 zinfo.create_system, extract_version, zinfo.reserved,
1201 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1202 zinfo.CRC, compress_size, file_size,
1203 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1204 0, zinfo.internal_attr, zinfo.external_attr,
1205 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001206 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001207 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001208 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001209 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001211
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001212 pos2 = self.fp.tell()
1213 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001214 centDirCount = count
1215 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001216 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001217 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1218 centDirOffset > ZIP64_LIMIT or
1219 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001220 # Need to write the ZIP64 end-of-archive records
1221 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001222 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001223 44, 45, 45, 0, 0, centDirCount, centDirCount,
1224 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001225 self.fp.write(zip64endrec)
1226
1227 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001228 structEndArchive64Locator,
1229 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001231 centDirCount = min(centDirCount, 0xFFFF)
1232 centDirSize = min(centDirSize, 0xFFFFFFFF)
1233 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001234
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001235 # check for valid comment length
1236 if len(self.comment) >= ZIP_MAX_COMMENT:
1237 if self.debug > 0:
1238 msg = 'Archive comment is too long; truncating to %d bytes' \
1239 % ZIP_MAX_COMMENT
1240 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001241
Georg Brandl2ee470f2008-07-16 12:55:28 +00001242 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001243 0, 0, centDirCount, centDirCount,
1244 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001245 self.fp.write(endrec)
1246 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001247 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001248
Fred Drake3d9091e2001-03-26 15:49:24 +00001249 if not self._filePassed:
1250 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001251 self.fp = None
1252
1253
1254class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001255 """Class to create ZIP archives with Python library files and packages."""
1256
Georg Brandlfe991052009-09-16 15:54:04 +00001257 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001258 """Add all files from "pathname" to the ZIP archive.
1259
Fred Drake484d7352000-10-02 21:14:52 +00001260 If pathname is a package directory, search the directory and
1261 all package subdirectories recursively for all *.py and enter
1262 the modules into the archive. If pathname is a plain
1263 directory, listdir *.py and enter all modules. Else, pathname
1264 must be a Python *.py file and the module will be put into the
1265 archive. Added modules are always module.pyo or module.pyc.
1266 This method will compile the module.py into module.pyc if
1267 necessary.
1268 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001269 dir, name = os.path.split(pathname)
1270 if os.path.isdir(pathname):
1271 initname = os.path.join(pathname, "__init__.py")
1272 if os.path.isfile(initname):
1273 # This is a package directory, add it
1274 if basename:
1275 basename = "%s/%s" % (basename, name)
1276 else:
1277 basename = name
1278 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001279 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 fname, arcname = self._get_codename(initname[0:-3], basename)
1281 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001282 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 self.write(fname, arcname)
1284 dirlist = os.listdir(pathname)
1285 dirlist.remove("__init__.py")
1286 # Add all *.py files and package subdirectories
1287 for filename in dirlist:
1288 path = os.path.join(pathname, filename)
1289 root, ext = os.path.splitext(filename)
1290 if os.path.isdir(path):
1291 if os.path.isfile(os.path.join(path, "__init__.py")):
1292 # This is a package directory, add it
1293 self.writepy(path, basename) # Recursive call
1294 elif ext == ".py":
1295 fname, arcname = self._get_codename(path[0:-3],
1296 basename)
1297 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001298 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001299 self.write(fname, arcname)
1300 else:
1301 # This is NOT a package directory, add its files at top level
1302 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001303 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001304 for filename in os.listdir(pathname):
1305 path = os.path.join(pathname, filename)
1306 root, ext = os.path.splitext(filename)
1307 if ext == ".py":
1308 fname, arcname = self._get_codename(path[0:-3],
1309 basename)
1310 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001311 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001312 self.write(fname, arcname)
1313 else:
1314 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001315 raise RuntimeError(
1316 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001317 fname, arcname = self._get_codename(pathname[0:-3], basename)
1318 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001319 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 self.write(fname, arcname)
1321
1322 def _get_codename(self, pathname, basename):
1323 """Return (filename, archivename) for the path.
1324
Fred Drake484d7352000-10-02 21:14:52 +00001325 Given a module name path, return the correct file path and
1326 archive name, compiling if necessary. For example, given
1327 /python/lib/string, return (/python/lib/string.pyc, string).
1328 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001329 file_py = pathname + ".py"
1330 file_pyc = pathname + ".pyc"
1331 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001332 pycache_pyc = imp.cache_from_source(file_py, True)
1333 pycache_pyo = imp.cache_from_source(file_py, False)
1334 if (os.path.isfile(file_pyo) and
1335 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1336 # Use .pyo file.
1337 arcname = fname = file_pyo
1338 elif (os.path.isfile(file_pyc) and
1339 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1340 # Use .pyc file.
1341 arcname = fname = file_pyc
1342 elif (os.path.isfile(pycache_pyc) and
1343 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1344 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1345 # file name in the archive.
1346 fname = pycache_pyc
1347 arcname = file_pyc
1348 elif (os.path.isfile(pycache_pyo) and
1349 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1350 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1351 # file name in the archive.
1352 fname = pycache_pyo
1353 arcname = file_pyo
1354 else:
1355 # Compile py into PEP 3147 pyc file.
Fred Drake484d7352000-10-02 21:14:52 +00001356 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001357 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001358 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001359 try:
Barry Warsaw28a691b2010-04-17 00:19:56 +00001360 py_compile.compile(file_py, doraise=True)
1361 except py_compile.PyCompileError as error:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001362 print(err.msg)
Barry Warsaw28a691b2010-04-17 00:19:56 +00001363 fname = file_py
1364 else:
1365 fname = (pycache_pyc if __debug__ else pycache_pyo)
1366 arcname = (file_pyc if __debug__ else file_pyo)
1367 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001368 if basename:
1369 archivename = "%s/%s" % (basename, archivename)
1370 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001371
1372
1373def main(args = None):
1374 import textwrap
1375 USAGE=textwrap.dedent("""\
1376 Usage:
1377 zipfile.py -l zipfile.zip # Show listing of a zipfile
1378 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1379 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1380 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1381 """)
1382 if args is None:
1383 args = sys.argv[1:]
1384
1385 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001386 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001387 sys.exit(1)
1388
1389 if args[0] == '-l':
1390 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001391 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001392 sys.exit(1)
1393 zf = ZipFile(args[1], 'r')
1394 zf.printdir()
1395 zf.close()
1396
1397 elif args[0] == '-t':
1398 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001399 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001400 sys.exit(1)
1401 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001402 badfile = zf.testzip()
1403 if badfile:
1404 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001405 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001406
1407 elif args[0] == '-e':
1408 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001409 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001410 sys.exit(1)
1411
1412 zf = ZipFile(args[1], 'r')
1413 out = args[2]
1414 for path in zf.namelist():
1415 if path.startswith('./'):
1416 tgt = os.path.join(out, path[2:])
1417 else:
1418 tgt = os.path.join(out, path)
1419
1420 tgtdir = os.path.dirname(tgt)
1421 if not os.path.exists(tgtdir):
1422 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001423 with open(tgt, 'wb') as fp:
1424 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001425 zf.close()
1426
1427 elif args[0] == '-c':
1428 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001429 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001430 sys.exit(1)
1431
1432 def addToZip(zf, path, zippath):
1433 if os.path.isfile(path):
1434 zf.write(path, zippath, ZIP_DEFLATED)
1435 elif os.path.isdir(path):
1436 for nm in os.listdir(path):
1437 addToZip(zf,
1438 os.path.join(path, nm), os.path.join(zippath, nm))
1439 # else: ignore
1440
1441 zf = ZipFile(args[1], 'w', allowZip64=True)
1442 for src in args[2:]:
1443 addToZip(zf, src, os.path.basename(src))
1444
1445 zf.close()
1446
1447if __name__ == "__main__":
1448 main()