blob: bcdb2b8c9a52bdaaa2839ec6e1187135c324d34a [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Skip Montanaro40fc1602001-03-01 04:27:19 +000025__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000026 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Fred Drake5db246d2000-09-29 20:44:48 +000028class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Tim Peterse1190062001-01-15 03:34:38 +000038error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000040ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000041ZIP_FILECOUNT_LIMIT = 1 << 16
42ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044# constants for Zip file compression methods
45ZIP_STORED = 0
46ZIP_DEFLATED = 8
47# Other ZIP compression methods not supported
48
Martin v. Löwisb09b8442008-07-03 14:13:42 +000049# Below are some formats and associated data for reading/writing headers using
50# the struct module. The names and structures of headers/records are those used
51# in the PKWARE description of the ZIP file format:
52# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
53# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055# The "end of central directory" structure, magic number, size, and indices
56# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000057structEndArchive = b"<4s4H2LH"
58stringEndArchive = b"PK\005\006"
59sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000060
61_ECD_SIGNATURE = 0
62_ECD_DISK_NUMBER = 1
63_ECD_DISK_START = 2
64_ECD_ENTRIES_THIS_DISK = 3
65_ECD_ENTRIES_TOTAL = 4
66_ECD_SIZE = 5
67_ECD_OFFSET = 6
68_ECD_COMMENT_SIZE = 7
69# These last two indices are not part of the structure as defined in the
70# spec, but they are used internally by this module as a convenience
71_ECD_COMMENT = 8
72_ECD_LOCATION = 9
73
74# The "central directory" structure, magic number, size, and indices
75# of entries in the structure (section V.F in the format document)
76structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000077stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078sizeCentralDir = struct.calcsize(structCentralDir)
79
Fred Drake3e038e52001-02-28 17:56:26 +000080# indexes of entries in the central directory structure
81_CD_SIGNATURE = 0
82_CD_CREATE_VERSION = 1
83_CD_CREATE_SYSTEM = 2
84_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000086_CD_FLAG_BITS = 5
87_CD_COMPRESS_TYPE = 6
88_CD_TIME = 7
89_CD_DATE = 8
90_CD_CRC = 9
91_CD_COMPRESSED_SIZE = 10
92_CD_UNCOMPRESSED_SIZE = 11
93_CD_FILENAME_LENGTH = 12
94_CD_EXTRA_FIELD_LENGTH = 13
95_CD_COMMENT_LENGTH = 14
96_CD_DISK_NUMBER_START = 15
97_CD_INTERNAL_FILE_ATTRIBUTES = 16
98_CD_EXTERNAL_FILE_ATTRIBUTES = 17
99_CD_LOCAL_HEADER_OFFSET = 18
100
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101# The "local file header" structure, magic number, size, and indices
102# (section V.A in the format document)
103structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000104stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105sizeFileHeader = struct.calcsize(structFileHeader)
106
Fred Drake3e038e52001-02-28 17:56:26 +0000107_FH_SIGNATURE = 0
108_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000110_FH_GENERAL_PURPOSE_FLAG_BITS = 3
111_FH_COMPRESSION_METHOD = 4
112_FH_LAST_MOD_TIME = 5
113_FH_LAST_MOD_DATE = 6
114_FH_CRC = 7
115_FH_COMPRESSED_SIZE = 8
116_FH_UNCOMPRESSED_SIZE = 9
117_FH_FILENAME_LENGTH = 10
118_FH_EXTRA_FIELD_LENGTH = 11
119
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000120# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000121structEndArchive64Locator = "<4sLQL"
122stringEndArchive64Locator = b"PK\x06\x07"
123sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124
125# The "Zip64 end of central directory" record, magic number, size, and indices
126# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127structEndArchive64 = "<4sQ2H2L4Q"
128stringEndArchive64 = b"PK\x06\x06"
129sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130
131_CD64_SIGNATURE = 0
132_CD64_DIRECTORY_RECSIZE = 1
133_CD64_CREATE_VERSION = 2
134_CD64_EXTRACT_VERSION = 3
135_CD64_DISK_NUMBER = 4
136_CD64_DISK_NUMBER_START = 5
137_CD64_NUMBER_ENTRIES_THIS_DISK = 6
138_CD64_NUMBER_ENTRIES_TOTAL = 7
139_CD64_DIRECTORY_SIZE = 8
140_CD64_OFFSET_START_CENTDIR = 9
141
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000142def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000143 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000144 if _EndRecData(fp):
145 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000146 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000147 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000148 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000149
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000150def is_zipfile(filename):
151 """Quickly see if a file is a ZIP file by checking the magic number.
152
153 The filename argument may be a file or file-like object too.
154 """
155 result = False
156 try:
157 if hasattr(filename, "read"):
158 result = _check_zipfile(fp=filename)
159 else:
160 with open(filename, "rb") as fp:
161 result = _check_zipfile(fp)
162 except IOError:
163 pass
164 return result
165
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000166def _EndRecData64(fpin, offset, endrec):
167 """
168 Read the ZIP64 end-of-archive records and use that to update endrec
169 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000170 fpin.seek(offset - sizeEndCentDir64Locator, 2)
171 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 return endrec
175
176 if diskno != 0 or disks != 1:
177 raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000182 sig, sz, create_version, read_version, disk_num, disk_dir, \
183 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000184 struct.unpack(structEndArchive64, data)
185 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186 return endrec
187
188 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000189 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000190 endrec[_ECD_DISK_NUMBER] = disk_num
191 endrec[_ECD_DISK_START] = disk_dir
192 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
193 endrec[_ECD_ENTRIES_TOTAL] = dircount2
194 endrec[_ECD_SIZE] = dirsize
195 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000196 return endrec
197
198
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000199def _EndRecData(fpin):
200 """Return data from the "End of Central Directory" record, or None.
201
202 The data is a list of the nine items in the ZIP "End of central dir"
203 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000204
205 # Determine file size
206 fpin.seek(0, 2)
207 filesize = fpin.tell()
208
209 # Check to see if this is ZIP file with no archive comment (the
210 # "end of central directory" structure should be the last item in the
211 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000212 try:
213 fpin.seek(-sizeEndCentDir, 2)
214 except IOError:
215 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000216 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000219 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec=list(endrec)
221
222 # Append a blank comment and record start offset
223 endrec.append(b"")
224 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000226 # Try to read the "Zip64 end of central directory" structure
227 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000228
229 # Either this is not a ZIP file, or it is a ZIP file with an archive
230 # comment. Search the end of the file for the "end of central directory"
231 # record signature. The comment is the last item in the ZIP file and may be
232 # up to 64K long. It is assumed that the "end of central directory" magic
233 # number does not appear in the comment.
234 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
235 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000237 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 if start >= 0:
239 # found the magic number; attempt to unpack and interpret
240 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000242 comment = data[start+sizeEndCentDir:]
243 # check that comment length is correct
244 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000245 # Append the archive comment and start offset
246 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000248
249 # Try to read the "Zip64 end of central directory" structure
250 return _EndRecData64(fpin, maxCommentStart + start - filesize,
251 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252
253 # Unable to find a valid end of central directory structure
254 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255
Fred Drake484d7352000-10-02 21:14:52 +0000256
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000257class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000258 """Class with attributes describing each file in the ZIP archive."""
259
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000260 __slots__ = (
261 'orig_filename',
262 'filename',
263 'date_time',
264 'compress_type',
265 'comment',
266 'extra',
267 'create_system',
268 'create_version',
269 'extract_version',
270 'reserved',
271 'flag_bits',
272 'volume',
273 'internal_attr',
274 'external_attr',
275 'header_offset',
276 'CRC',
277 'compress_size',
278 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000279 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000280 )
281
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000282 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000283 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000284
285 # Terminate the file name at the first null byte. Null bytes in file
286 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000287 null_byte = filename.find(chr(0))
288 if null_byte >= 0:
289 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 # This is used to ensure paths in generated ZIP files always use
291 # forward slashes as the directory separator, as required by the
292 # ZIP format specification.
293 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000294 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295
Greg Ward8e36d282003-06-18 00:53:06 +0000296 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000297 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000299 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000300 self.comment = b"" # Comment for each file
301 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000302 if sys.platform == 'win32':
303 self.create_system = 0 # System which created ZIP archive
304 else:
305 # Assume everything else is unix-y
306 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000307 self.create_version = 20 # Version which created ZIP archive
308 self.extract_version = 20 # Version needed to extract archive
309 self.reserved = 0 # Must be zero
310 self.flag_bits = 0 # ZIP flag bits
311 self.volume = 0 # Volume number of file header
312 self.internal_attr = 0 # Internal attributes
313 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000314 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000315 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000316 # CRC CRC-32 of the uncompressed file
317 # compress_size Size of the compressed file
318 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319
320 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000321 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 dt = self.date_time
323 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000324 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000325 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000326 # Set these to zero because we write them after the file data
327 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 else:
Tim Peterse1190062001-01-15 03:34:38 +0000329 CRC = self.CRC
330 compress_size = self.compress_size
331 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332
333 extra = self.extra
334
335 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
336 # File is larger than what fits into a 4 byte integer,
337 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000338 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339 extra = extra + struct.pack(fmt,
340 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000341 file_size = 0xffffffff
342 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000343 self.extract_version = max(45, self.extract_version)
344 self.create_version = max(45, self.extract_version)
345
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000346 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000347 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000348 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000349 self.compress_type, dostime, dosdate, CRC,
350 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000351 len(filename), len(extra))
352 return header + filename + extra
353
354 def _encodeFilenameFlags(self):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359
360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000365 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 if tp == 1:
367 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000368 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000370 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000371 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 elif ln == 0:
374 counts = ()
375 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000376 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000381 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000385 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000389 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000395
396
Thomas Wouterscf297e42007-02-23 15:07:44 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000402 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000445 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000446
447 def __call__(self, c):
448 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000449 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000452 self._UpdateKeys(c)
453 return c
454
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000455class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000456 """File-like object for reading an archive member.
457 Is returned by ZipFile.open().
458 """
459
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000460 # Max size supported by decompressor.
461 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000463 # Read from compressed files in 4k blocks.
464 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000465
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000466 # Search for universal newlines or line chunks.
467 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
468
469 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
470 self._fileobj = fileobj
471 self._decrypter = decrypter
472
Ezio Melotti92b47432010-01-28 01:44:41 +0000473 self._compress_type = zipinfo.compress_type
474 self._compress_size = zipinfo.compress_size
475 self._compress_left = zipinfo.compress_size
476
477 if self._compress_type == ZIP_DEFLATED:
478 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000479 self._unconsumed = b''
480
481 self._readbuffer = b''
482 self._offset = 0
483
484 self._universal = 'U' in mode
485 self.newlines = None
486
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000487 # Adjust read size for encrypted files since the first 12 bytes
488 # are for the encryption/password information.
489 if self._decrypter is not None:
490 self._compress_left -= 12
491
492 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493 self.name = zipinfo.filename
494
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000495 if hasattr(zipinfo, 'CRC'):
496 self._expected_crc = zipinfo.CRC
497 self._running_crc = crc32(b'') & 0xffffffff
498 else:
499 self._expected_crc = None
500
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000501 def readline(self, limit=-1):
502 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000503
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000504 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000505 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000506
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000507 if not self._universal and limit < 0:
508 # Shortcut common case - newline found in buffer.
509 i = self._readbuffer.find(b'\n', self._offset) + 1
510 if i > 0:
511 line = self._readbuffer[self._offset: i]
512 self._offset = i
513 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000514
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000515 if not self._universal:
516 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000517
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000518 line = b''
519 while limit < 0 or len(line) < limit:
520 readahead = self.peek(2)
521 if readahead == b'':
522 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000523
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000524 #
525 # Search for universal newlines or line chunks.
526 #
527 # The pattern returns either a line chunk or a newline, but not
528 # both. Combined with peek(2), we are assured that the sequence
529 # '\r\n' is always retrieved completely and never split into
530 # separate newlines - '\r', '\n' due to coincidental readaheads.
531 #
532 match = self.PATTERN.search(readahead)
533 newline = match.group('newline')
534 if newline is not None:
535 if self.newlines is None:
536 self.newlines = []
537 if newline not in self.newlines:
538 self.newlines.append(newline)
539 self._offset += len(newline)
540 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000541
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000542 chunk = match.group('chunk')
543 if limit >= 0:
544 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000545
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000546 self._offset += len(chunk)
547 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000548
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000549 return line
550
551 def peek(self, n=1):
552 """Returns buffered bytes without advancing the position."""
553 if n > len(self._readbuffer) - self._offset:
554 chunk = self.read(n)
555 self._offset -= len(chunk)
556
557 # Return up to 512 bytes to reduce allocation overhead for tight loops.
558 return self._readbuffer[self._offset: self._offset + 512]
559
560 def readable(self):
561 return True
562
563 def read(self, n=-1):
564 """Read and return up to n bytes.
565 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000566 """
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000567 buf = b''
Antoine Pitrou6464d5f2010-09-12 14:51:20 +0000568 if n is None:
569 n = -1
570 while True:
571 if n < 0:
572 data = self.read1(n)
573 elif n > len(buf):
574 data = self.read1(n - len(buf))
575 else:
576 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000577 if len(data) == 0:
578 return buf
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000579 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000580
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000581 def _update_crc(self, newdata, eof):
582 # Update the CRC using the given data.
583 if self._expected_crc is None:
584 # No need to compute the CRC if we don't have a reference value
585 return
586 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
587 # Check the CRC if we're at the end of the file
588 if eof and self._running_crc != self._expected_crc:
589 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
590
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000591 def read1(self, n):
592 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000593
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000594 # Simplify algorithm (branching) by transforming negative n to large n.
595 if n < 0 or n is None:
596 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000598 # Bytes available in read buffer.
599 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000600
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000601 # Read from file.
602 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
603 nbytes = n - len_readbuffer - len(self._unconsumed)
604 nbytes = max(nbytes, self.MIN_READ_SIZE)
605 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000606
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000607 data = self._fileobj.read(nbytes)
608 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000609
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000610 if data and self._decrypter is not None:
611 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000612
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000613 if self._compress_type == ZIP_STORED:
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000614 self._update_crc(data, eof=(self._compress_left==0))
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000615 self._readbuffer = self._readbuffer[self._offset:] + data
616 self._offset = 0
617 else:
618 # Prepare deflated bytes for decompression.
619 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000620
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000621 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000622 if (len(self._unconsumed) > 0 and n > len_readbuffer and
623 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000624 data = self._decompressor.decompress(
625 self._unconsumed,
626 max(n - len_readbuffer, self.MIN_READ_SIZE)
627 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000628
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000629 self._unconsumed = self._decompressor.unconsumed_tail
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000630 eof = len(self._unconsumed) == 0 and self._compress_left == 0
631 if eof:
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000632 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000633
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +0000634 self._update_crc(data, eof=eof)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000635 self._readbuffer = self._readbuffer[self._offset:] + data
636 self._offset = 0
637
638 # Read from buffer.
639 data = self._readbuffer[self._offset: self._offset + n]
640 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000641 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000642
643
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000644
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000645class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000646 """ Class with methods to open, read, write, close, list zip files.
647
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000648 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000649
Fred Drake3d9091e2001-03-26 15:49:24 +0000650 file: Either the path to the file, or a file-like object.
651 If it is a path, the file will be opened and closed by ZipFile.
652 mode: The mode can be either read "r", write "w" or append "a".
653 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000654 allowZip64: if True ZipFile will create files with ZIP64 extensions when
655 needed, otherwise it will raise an exception when this would
656 be necessary.
657
Fred Drake3d9091e2001-03-26 15:49:24 +0000658 """
Fred Drake484d7352000-10-02 21:14:52 +0000659
Fred Drake90eac282001-02-28 05:29:34 +0000660 fp = None # Set here since __del__ checks it
661
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000662 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000663 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000664 if mode not in ("r", "w", "a"):
665 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
666
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000667 if compression == ZIP_STORED:
668 pass
669 elif compression == ZIP_DEFLATED:
670 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000671 raise RuntimeError(
672 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000673 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000674 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000675
676 self._allowZip64 = allowZip64
677 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000678 self.debug = 0 # Level of printing: 0 through 3
679 self.NameToInfo = {} # Find file info given name
680 self.filelist = [] # List of ZipInfo instances for archive
681 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000682 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000683 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000684 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000685
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000687 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000688 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000689 self._filePassed = 0
690 self.filename = file
691 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000692 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000693 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000694 except IOError:
695 if mode == 'a':
696 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000697 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000698 else:
699 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000700 else:
701 self._filePassed = 1
702 self.fp = file
703 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000704
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000705 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 self._GetContents()
707 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000708 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000709 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000710 try: # See if file is a zip file
711 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000712 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000713 self.fp.seek(self.start_dir, 0)
714 except BadZipfile: # file is not a zip file, just append
715 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000716 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000717 if not self._filePassed:
718 self.fp.close()
719 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000720 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000721
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000722 def __enter__(self):
723 return self
724
725 def __exit__(self, type, value, traceback):
726 self.close()
727
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000728 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000729 """Read the directory, making sure we close the file if the format
730 is bad."""
731 try:
732 self._RealGetContents()
733 except BadZipfile:
734 if not self._filePassed:
735 self.fp.close()
736 self.fp = None
737 raise
738
739 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000740 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000741 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000742 endrec = _EndRecData(fp)
743 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000744 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000745 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000746 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000747 size_cd = endrec[_ECD_SIZE] # bytes in central directory
748 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
749 self.comment = endrec[_ECD_COMMENT] # archive comment
750
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000751 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000752 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000753 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
754 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000755 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
756
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000757 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000758 inferred = concat + offset_cd
759 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000760 # self.start_dir: Position of start of central directory
761 self.start_dir = offset_cd + concat
762 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000763 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000764 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000765 total = 0
766 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000767 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000768 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000769 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 centdir = struct.unpack(structCentralDir, centdir)
771 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000772 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000773 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000774 flags = centdir[5]
775 if flags & 0x800:
776 # UTF-8 file names extension
777 filename = filename.decode('utf-8')
778 else:
779 # Historical ZIP filename encoding
780 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000781 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000782 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000783 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
784 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000785 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786 (x.create_version, x.create_system, x.extract_version, x.reserved,
787 x.flag_bits, x.compress_type, t, d,
788 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
789 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
790 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000791 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000792 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000793 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000794
795 x._decodeExtra()
796 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000797 self.filelist.append(x)
798 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000799
800 # update total bytes read from central directory
801 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
802 + centdir[_CD_EXTRA_FIELD_LENGTH]
803 + centdir[_CD_COMMENT_LENGTH])
804
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000805 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000806 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000807
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000808
809 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000810 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 l = []
812 for data in self.filelist:
813 l.append(data.filename)
814 return l
815
816 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000817 """Return a list of class ZipInfo instances for files in the
818 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 return self.filelist
820
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000821 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000822 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000823 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
824 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000825 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000826 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000827 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
828 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829
830 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000831 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000832 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000833 for zinfo in self.filelist:
834 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000835 # Read by chunks, to avoid an OverflowError or a
836 # MemoryError with very large embedded files.
837 f = self.open(zinfo.filename, "r")
838 while f.read(chunk_size): # Check CRC-32
839 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000840 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000841 return zinfo.filename
842
843 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000844 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000845 info = self.NameToInfo.get(name)
846 if info is None:
847 raise KeyError(
848 'There is no item named %r in the archive' % name)
849
850 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000851
Thomas Wouterscf297e42007-02-23 15:07:44 +0000852 def setpassword(self, pwd):
853 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000854 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000855 self.pwd = pwd
856
857 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000858 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 return self.open(name, "r", pwd).read()
860
861 def open(self, name, mode="r", pwd=None):
862 """Return file-like object for 'name'."""
863 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000864 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000865 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000866 raise RuntimeError(
867 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000868
Guido van Rossumd8faa362007-04-27 19:54:29 +0000869 # Only open a new file for instances where we were not
870 # given a file object in the constructor
871 if self._filePassed:
872 zef_file = self.fp
873 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000874 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875
Georg Brandlb533e262008-05-25 18:19:30 +0000876 # Make sure we have an info object
877 if isinstance(name, ZipInfo):
878 # 'name' is already an info object
879 zinfo = name
880 else:
881 # Get info object for name
882 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883
884 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000885
886 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000887 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000888 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000889 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000890
891 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000893 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000895
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000896 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000897 raise BadZipfile(
898 'File name in directory %r and header %r differ.'
899 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000900
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 # check for encrypted flag & handle password
902 is_encrypted = zinfo.flag_bits & 0x1
903 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000904 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905 if not pwd:
906 pwd = self.pwd
907 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000908 raise RuntimeError("File %s is encrypted, "
909 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910
Thomas Wouterscf297e42007-02-23 15:07:44 +0000911 zd = _ZipDecrypter(pwd)
912 # The first 12 bytes in the cypher stream is an encryption header
913 # used to strengthen the algorithm. The first 11 bytes are
914 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000915 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000916 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000918 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000919 if zinfo.flag_bits & 0x8:
920 # compare against the file type from extended local headers
921 check_byte = (zinfo._raw_time >> 8) & 0xff
922 else:
923 # compare against the CRC otherwise
924 check_byte = (zinfo.CRC >> 24) & 0xff
925 if h[11] != check_byte:
926 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000928 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000929
Christian Heimes790c8232008-01-07 21:14:23 +0000930 def extract(self, member, path=None, pwd=None):
931 """Extract a member from the archive to the current working directory,
932 using its full name. Its file information is extracted as accurately
933 as possible. `member' may be a filename or a ZipInfo object. You can
934 specify a different directory using `path'.
935 """
936 if not isinstance(member, ZipInfo):
937 member = self.getinfo(member)
938
939 if path is None:
940 path = os.getcwd()
941
942 return self._extract_member(member, path, pwd)
943
944 def extractall(self, path=None, members=None, pwd=None):
945 """Extract all members from the archive to the current working
946 directory. `path' specifies a different directory to extract to.
947 `members' is optional and must be a subset of the list returned
948 by namelist().
949 """
950 if members is None:
951 members = self.namelist()
952
953 for zipinfo in members:
954 self.extract(zipinfo, path, pwd)
955
956 def _extract_member(self, member, targetpath, pwd):
957 """Extract the ZipInfo object 'member' to a physical
958 file on the path targetpath.
959 """
960 # build the destination pathname, replacing
961 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000962 # Strip trailing path separator, unless it represents the root.
963 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
964 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000965 targetpath = targetpath[:-1]
966
967 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000968 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000969 targetpath = os.path.join(targetpath, member.filename[1:])
970 else:
971 targetpath = os.path.join(targetpath, member.filename)
972
973 targetpath = os.path.normpath(targetpath)
974
975 # Create all upper directories if necessary.
976 upperdirs = os.path.dirname(targetpath)
977 if upperdirs and not os.path.exists(upperdirs):
978 os.makedirs(upperdirs)
979
Martin v. Löwis59e47792009-01-24 14:10:07 +0000980 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000981 if not os.path.isdir(targetpath):
982 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000983 return targetpath
984
Georg Brandlb533e262008-05-25 18:19:30 +0000985 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000986 target = open(targetpath, "wb")
987 shutil.copyfileobj(source, target)
988 source.close()
989 target.close()
990
991 return targetpath
992
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000993 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000994 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000995 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000996 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000997 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000998 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000999 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001000 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +00001001 raise RuntimeError(
1002 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001003 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +00001004 raise RuntimeError(
1005 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001006 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +00001007 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001008 if zinfo.file_size > ZIP64_LIMIT:
1009 if not self._allowZip64:
1010 raise LargeZipFile("Filesize would require ZIP64 extensions")
1011 if zinfo.header_offset > ZIP64_LIMIT:
1012 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +00001013 raise LargeZipFile(
1014 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015
1016 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +00001017 """Put the bytes from filename into the archive under the name
1018 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001019 if not self.fp:
1020 raise RuntimeError(
1021 "Attempt to write to ZIP archive that was already closed")
1022
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001023 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001024 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001025 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001026 date_time = mtime[0:6]
1027 # Create ZipInfo instance to store file information
1028 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001029 arcname = filename
1030 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1031 while arcname[0] in (os.sep, os.altsep):
1032 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001033 if isdir:
1034 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001035 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001036 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001037 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001038 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001039 else:
Tim Peterse1190062001-01-15 03:34:38 +00001040 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001041
1042 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001043 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001044 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001045
1046 self._writecheck(zinfo)
1047 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001048
1049 if isdir:
1050 zinfo.file_size = 0
1051 zinfo.compress_size = 0
1052 zinfo.CRC = 0
1053 self.filelist.append(zinfo)
1054 self.NameToInfo[zinfo.filename] = zinfo
1055 self.fp.write(zinfo.FileHeader())
1056 return
1057
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001058 with open(filename, "rb") as fp:
1059 # Must overwrite CRC and sizes with correct data later
1060 zinfo.CRC = CRC = 0
1061 zinfo.compress_size = compress_size = 0
1062 zinfo.file_size = file_size = 0
1063 self.fp.write(zinfo.FileHeader())
1064 if zinfo.compress_type == ZIP_DEFLATED:
1065 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1066 zlib.DEFLATED, -15)
1067 else:
1068 cmpr = None
1069 while 1:
1070 buf = fp.read(1024 * 8)
1071 if not buf:
1072 break
1073 file_size = file_size + len(buf)
1074 CRC = crc32(buf, CRC) & 0xffffffff
1075 if cmpr:
1076 buf = cmpr.compress(buf)
1077 compress_size = compress_size + len(buf)
1078 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001079 if cmpr:
1080 buf = cmpr.flush()
1081 compress_size = compress_size + len(buf)
1082 self.fp.write(buf)
1083 zinfo.compress_size = compress_size
1084 else:
1085 zinfo.compress_size = file_size
1086 zinfo.CRC = CRC
1087 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001088 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001089 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001090 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001091 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001092 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001093 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001094 self.filelist.append(zinfo)
1095 self.NameToInfo[zinfo.filename] = zinfo
1096
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001097 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001098 """Write a file into the archive. The contents is 'data', which
1099 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1100 it is encoded as UTF-8 first.
1101 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001102 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001103 if isinstance(data, str):
1104 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001105 if not isinstance(zinfo_or_arcname, ZipInfo):
1106 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001107 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001108 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001109 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001110 else:
1111 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001112
1113 if not self.fp:
1114 raise RuntimeError(
1115 "Attempt to write to ZIP archive that was already closed")
1116
Guido van Rossum85825dc2007-08-27 17:03:28 +00001117 zinfo.file_size = len(data) # Uncompressed size
1118 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001119 if compress_type is not None:
1120 zinfo.compress_type = compress_type
1121
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001122 self._writecheck(zinfo)
1123 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001124 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001125 if zinfo.compress_type == ZIP_DEFLATED:
1126 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1127 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001128 data = co.compress(data) + co.flush()
1129 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001130 else:
1131 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001132 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001133 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001134 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001135 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001136 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001137 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001138 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001139 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001140 self.filelist.append(zinfo)
1141 self.NameToInfo[zinfo.filename] = zinfo
1142
1143 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001144 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001145 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001146
1147 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001148 """Close the file, and for mode "w" and "a" write the ending
1149 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001150 if self.fp is None:
1151 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001152
1153 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001154 count = 0
1155 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001156 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001157 count = count + 1
1158 dt = zinfo.date_time
1159 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001160 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 extra = []
1162 if zinfo.file_size > ZIP64_LIMIT \
1163 or zinfo.compress_size > ZIP64_LIMIT:
1164 extra.append(zinfo.file_size)
1165 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001166 file_size = 0xffffffff
1167 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001168 else:
1169 file_size = zinfo.file_size
1170 compress_size = zinfo.compress_size
1171
1172 if zinfo.header_offset > ZIP64_LIMIT:
1173 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001174 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001175 else:
1176 header_offset = zinfo.header_offset
1177
1178 extra_data = zinfo.extra
1179 if extra:
1180 # Append a ZIP64 field to the extra's
1181 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001182 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001183 1, 8*len(extra), *extra) + extra_data
1184
1185 extract_version = max(45, zinfo.extract_version)
1186 create_version = max(45, zinfo.create_version)
1187 else:
1188 extract_version = zinfo.extract_version
1189 create_version = zinfo.create_version
1190
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001191 try:
1192 filename, flag_bits = zinfo._encodeFilenameFlags()
1193 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001194 stringCentralDir, create_version,
1195 zinfo.create_system, extract_version, zinfo.reserved,
1196 flag_bits, zinfo.compress_type, dostime, dosdate,
1197 zinfo.CRC, compress_size, file_size,
1198 len(filename), len(extra_data), len(zinfo.comment),
1199 0, zinfo.internal_attr, zinfo.external_attr,
1200 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001201 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001202 print((structCentralDir, stringCentralDir, create_version,
1203 zinfo.create_system, extract_version, zinfo.reserved,
1204 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1205 zinfo.CRC, compress_size, file_size,
1206 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1207 0, zinfo.internal_attr, zinfo.external_attr,
1208 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001209 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001210 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001211 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001212 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001213 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001214
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001215 pos2 = self.fp.tell()
1216 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001217 centDirCount = count
1218 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001219 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001220 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1221 centDirOffset > ZIP64_LIMIT or
1222 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001223 # Need to write the ZIP64 end-of-archive records
1224 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001225 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001226 44, 45, 45, 0, 0, centDirCount, centDirCount,
1227 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001228 self.fp.write(zip64endrec)
1229
1230 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001231 structEndArchive64Locator,
1232 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001233 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001234 centDirCount = min(centDirCount, 0xFFFF)
1235 centDirSize = min(centDirSize, 0xFFFFFFFF)
1236 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001237
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001238 # check for valid comment length
1239 if len(self.comment) >= ZIP_MAX_COMMENT:
1240 if self.debug > 0:
1241 msg = 'Archive comment is too long; truncating to %d bytes' \
1242 % ZIP_MAX_COMMENT
1243 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001244
Georg Brandl2ee470f2008-07-16 12:55:28 +00001245 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001246 0, 0, centDirCount, centDirCount,
1247 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001248 self.fp.write(endrec)
1249 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001250 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001251
Fred Drake3d9091e2001-03-26 15:49:24 +00001252 if not self._filePassed:
1253 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001254 self.fp = None
1255
1256
1257class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001258 """Class to create ZIP archives with Python library files and packages."""
1259
Georg Brandlfe991052009-09-16 15:54:04 +00001260 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 """Add all files from "pathname" to the ZIP archive.
1262
Fred Drake484d7352000-10-02 21:14:52 +00001263 If pathname is a package directory, search the directory and
1264 all package subdirectories recursively for all *.py and enter
1265 the modules into the archive. If pathname is a plain
1266 directory, listdir *.py and enter all modules. Else, pathname
1267 must be a Python *.py file and the module will be put into the
1268 archive. Added modules are always module.pyo or module.pyc.
1269 This method will compile the module.py into module.pyc if
1270 necessary.
1271 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001272 dir, name = os.path.split(pathname)
1273 if os.path.isdir(pathname):
1274 initname = os.path.join(pathname, "__init__.py")
1275 if os.path.isfile(initname):
1276 # This is a package directory, add it
1277 if basename:
1278 basename = "%s/%s" % (basename, name)
1279 else:
1280 basename = name
1281 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001282 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001283 fname, arcname = self._get_codename(initname[0:-3], basename)
1284 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001285 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001286 self.write(fname, arcname)
1287 dirlist = os.listdir(pathname)
1288 dirlist.remove("__init__.py")
1289 # Add all *.py files and package subdirectories
1290 for filename in dirlist:
1291 path = os.path.join(pathname, filename)
1292 root, ext = os.path.splitext(filename)
1293 if os.path.isdir(path):
1294 if os.path.isfile(os.path.join(path, "__init__.py")):
1295 # This is a package directory, add it
1296 self.writepy(path, basename) # Recursive call
1297 elif ext == ".py":
1298 fname, arcname = self._get_codename(path[0:-3],
1299 basename)
1300 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001301 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001302 self.write(fname, arcname)
1303 else:
1304 # This is NOT a package directory, add its files at top level
1305 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001306 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001307 for filename in os.listdir(pathname):
1308 path = os.path.join(pathname, filename)
1309 root, ext = os.path.splitext(filename)
1310 if ext == ".py":
1311 fname, arcname = self._get_codename(path[0:-3],
1312 basename)
1313 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001314 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001315 self.write(fname, arcname)
1316 else:
1317 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001318 raise RuntimeError(
1319 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001320 fname, arcname = self._get_codename(pathname[0:-3], basename)
1321 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001322 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001323 self.write(fname, arcname)
1324
1325 def _get_codename(self, pathname, basename):
1326 """Return (filename, archivename) for the path.
1327
Fred Drake484d7352000-10-02 21:14:52 +00001328 Given a module name path, return the correct file path and
1329 archive name, compiling if necessary. For example, given
1330 /python/lib/string, return (/python/lib/string.pyc, string).
1331 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001332 file_py = pathname + ".py"
1333 file_pyc = pathname + ".pyc"
1334 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001335 pycache_pyc = imp.cache_from_source(file_py, True)
1336 pycache_pyo = imp.cache_from_source(file_py, False)
1337 if (os.path.isfile(file_pyo) and
1338 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1339 # Use .pyo file.
1340 arcname = fname = file_pyo
1341 elif (os.path.isfile(file_pyc) and
1342 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1343 # Use .pyc file.
1344 arcname = fname = file_pyc
1345 elif (os.path.isfile(pycache_pyc) and
1346 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1347 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1348 # file name in the archive.
1349 fname = pycache_pyc
1350 arcname = file_pyc
1351 elif (os.path.isfile(pycache_pyo) and
1352 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1353 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1354 # file name in the archive.
1355 fname = pycache_pyo
1356 arcname = file_pyo
1357 else:
1358 # Compile py into PEP 3147 pyc file.
Fred Drake484d7352000-10-02 21:14:52 +00001359 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001360 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001361 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001362 try:
Barry Warsaw28a691b2010-04-17 00:19:56 +00001363 py_compile.compile(file_py, doraise=True)
1364 except py_compile.PyCompileError as error:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001365 print(err.msg)
Barry Warsaw28a691b2010-04-17 00:19:56 +00001366 fname = file_py
1367 else:
1368 fname = (pycache_pyc if __debug__ else pycache_pyo)
1369 arcname = (file_pyc if __debug__ else file_pyo)
1370 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001371 if basename:
1372 archivename = "%s/%s" % (basename, archivename)
1373 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001374
1375
1376def main(args = None):
1377 import textwrap
1378 USAGE=textwrap.dedent("""\
1379 Usage:
1380 zipfile.py -l zipfile.zip # Show listing of a zipfile
1381 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1382 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1383 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1384 """)
1385 if args is None:
1386 args = sys.argv[1:]
1387
1388 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001389 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001390 sys.exit(1)
1391
1392 if args[0] == '-l':
1393 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001394 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001395 sys.exit(1)
1396 zf = ZipFile(args[1], 'r')
1397 zf.printdir()
1398 zf.close()
1399
1400 elif args[0] == '-t':
1401 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001402 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001403 sys.exit(1)
1404 zf = ZipFile(args[1], 'r')
Antoine Pitrou7c8bcb62010-08-12 15:11:50 +00001405 badfile = zf.testzip()
1406 if badfile:
1407 print("The following enclosed file is corrupted: {!r}".format(badfile))
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001408 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001409
1410 elif args[0] == '-e':
1411 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001412 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001413 sys.exit(1)
1414
1415 zf = ZipFile(args[1], 'r')
1416 out = args[2]
1417 for path in zf.namelist():
1418 if path.startswith('./'):
1419 tgt = os.path.join(out, path[2:])
1420 else:
1421 tgt = os.path.join(out, path)
1422
1423 tgtdir = os.path.dirname(tgt)
1424 if not os.path.exists(tgtdir):
1425 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001426 with open(tgt, 'wb') as fp:
1427 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001428 zf.close()
1429
1430 elif args[0] == '-c':
1431 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001432 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001433 sys.exit(1)
1434
1435 def addToZip(zf, path, zippath):
1436 if os.path.isfile(path):
1437 zf.write(path, zippath, ZIP_DEFLATED)
1438 elif os.path.isdir(path):
1439 for nm in os.listdir(path):
1440 addToZip(zf,
1441 os.path.join(path, nm), os.path.join(zippath, nm))
1442 # else: ignore
1443
1444 zf = ZipFile(args[1], 'w', allowZip64=True)
1445 for src in args[2:]:
1446 addToZip(zf, src, os.path.basename(src))
1447
1448 zf.close()
1449
1450if __name__ == "__main__":
1451 main()