blob: f81cc8b2eee78225ed82b75a7eb809137d8d649a [file] [log] [blame]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001"""
2Read and write ZIP files.
Guido van Rossumd6ca5462007-05-22 01:29:33 +00003
4XXX references to utf-8 need further investigation.
Thomas Wouters0e3f5912006-08-11 14:57:12 +00005"""
Antoine Pitroua32f9a22010-01-27 21:18:57 +00006import io
Barry Warsaw28a691b2010-04-17 00:19:56 +00007import os
Antoine Pitroua32f9a22010-01-27 21:18:57 +00008import re
Barry Warsaw28a691b2010-04-17 00:19:56 +00009import imp
10import sys
11import time
12import stat
13import shutil
14import struct
15import binascii
16
Guido van Rossum32abe6f2000-03-31 17:30:02 +000017
18try:
Tim Peterse1190062001-01-15 03:34:38 +000019 import zlib # We may need its compression method
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000020 crc32 = zlib.crc32
Guido van Rossum9c673f32001-04-10 15:37:12 +000021except ImportError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +000022 zlib = None
Christian Heimesd5e2b6f2008-03-19 21:50:51 +000023 crc32 = binascii.crc32
Guido van Rossum32abe6f2000-03-31 17:30:02 +000024
Skip Montanaro40fc1602001-03-01 04:27:19 +000025__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
Thomas Wouters0e3f5912006-08-11 14:57:12 +000026 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
Skip Montanaro40fc1602001-03-01 04:27:19 +000027
Fred Drake5db246d2000-09-29 20:44:48 +000028class BadZipfile(Exception):
Guido van Rossum32abe6f2000-03-31 17:30:02 +000029 pass
Thomas Wouters0e3f5912006-08-11 14:57:12 +000030
31
32class LargeZipFile(Exception):
33 """
34 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
35 and those extensions are disabled.
36 """
37
Tim Peterse1190062001-01-15 03:34:38 +000038error = BadZipfile # The exception raised by this module
Guido van Rossum32abe6f2000-03-31 17:30:02 +000039
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +000040ZIP64_LIMIT = (1 << 31) - 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +000041ZIP_FILECOUNT_LIMIT = 1 << 16
42ZIP_MAX_COMMENT = (1 << 16) - 1
Thomas Wouters0e3f5912006-08-11 14:57:12 +000043
Guido van Rossum32abe6f2000-03-31 17:30:02 +000044# constants for Zip file compression methods
45ZIP_STORED = 0
46ZIP_DEFLATED = 8
47# Other ZIP compression methods not supported
48
Martin v. Löwisb09b8442008-07-03 14:13:42 +000049# Below are some formats and associated data for reading/writing headers using
50# the struct module. The names and structures of headers/records are those used
51# in the PKWARE description of the ZIP file format:
52# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
53# (URL valid as of January 2008)
Thomas Wouters0e3f5912006-08-11 14:57:12 +000054
Martin v. Löwisb09b8442008-07-03 14:13:42 +000055# The "end of central directory" structure, magic number, size, and indices
56# (section V.I in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +000057structEndArchive = b"<4s4H2LH"
58stringEndArchive = b"PK\005\006"
59sizeEndCentDir = struct.calcsize(structEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +000060
61_ECD_SIGNATURE = 0
62_ECD_DISK_NUMBER = 1
63_ECD_DISK_START = 2
64_ECD_ENTRIES_THIS_DISK = 3
65_ECD_ENTRIES_TOTAL = 4
66_ECD_SIZE = 5
67_ECD_OFFSET = 6
68_ECD_COMMENT_SIZE = 7
69# These last two indices are not part of the structure as defined in the
70# spec, but they are used internally by this module as a convenience
71_ECD_COMMENT = 8
72_ECD_LOCATION = 9
73
74# The "central directory" structure, magic number, size, and indices
75# of entries in the structure (section V.F in the format document)
76structCentralDir = "<4s4B4HL2L5H2L"
Georg Brandl2ee470f2008-07-16 12:55:28 +000077stringCentralDir = b"PK\001\002"
Martin v. Löwisb09b8442008-07-03 14:13:42 +000078sizeCentralDir = struct.calcsize(structCentralDir)
79
Fred Drake3e038e52001-02-28 17:56:26 +000080# indexes of entries in the central directory structure
81_CD_SIGNATURE = 0
82_CD_CREATE_VERSION = 1
83_CD_CREATE_SYSTEM = 2
84_CD_EXTRACT_VERSION = 3
Martin v. Löwisb09b8442008-07-03 14:13:42 +000085_CD_EXTRACT_SYSTEM = 4
Fred Drake3e038e52001-02-28 17:56:26 +000086_CD_FLAG_BITS = 5
87_CD_COMPRESS_TYPE = 6
88_CD_TIME = 7
89_CD_DATE = 8
90_CD_CRC = 9
91_CD_COMPRESSED_SIZE = 10
92_CD_UNCOMPRESSED_SIZE = 11
93_CD_FILENAME_LENGTH = 12
94_CD_EXTRA_FIELD_LENGTH = 13
95_CD_COMMENT_LENGTH = 14
96_CD_DISK_NUMBER_START = 15
97_CD_INTERNAL_FILE_ATTRIBUTES = 16
98_CD_EXTERNAL_FILE_ATTRIBUTES = 17
99_CD_LOCAL_HEADER_OFFSET = 18
100
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000101# The "local file header" structure, magic number, size, and indices
102# (section V.A in the format document)
103structFileHeader = "<4s2B4HL2L2H"
Georg Brandl2ee470f2008-07-16 12:55:28 +0000104stringFileHeader = b"PK\003\004"
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000105sizeFileHeader = struct.calcsize(structFileHeader)
106
Fred Drake3e038e52001-02-28 17:56:26 +0000107_FH_SIGNATURE = 0
108_FH_EXTRACT_VERSION = 1
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000109_FH_EXTRACT_SYSTEM = 2
Fred Drake3e038e52001-02-28 17:56:26 +0000110_FH_GENERAL_PURPOSE_FLAG_BITS = 3
111_FH_COMPRESSION_METHOD = 4
112_FH_LAST_MOD_TIME = 5
113_FH_LAST_MOD_DATE = 6
114_FH_CRC = 7
115_FH_COMPRESSED_SIZE = 8
116_FH_UNCOMPRESSED_SIZE = 9
117_FH_FILENAME_LENGTH = 10
118_FH_EXTRA_FIELD_LENGTH = 11
119
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000120# The "Zip64 end of central directory locator" structure, magic number, and size
Georg Brandl2ee470f2008-07-16 12:55:28 +0000121structEndArchive64Locator = "<4sLQL"
122stringEndArchive64Locator = b"PK\x06\x07"
123sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000124
125# The "Zip64 end of central directory" record, magic number, size, and indices
126# (section V.G in the format document)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000127structEndArchive64 = "<4sQ2H2L4Q"
128stringEndArchive64 = b"PK\x06\x06"
129sizeEndCentDir64 = struct.calcsize(structEndArchive64)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000130
131_CD64_SIGNATURE = 0
132_CD64_DIRECTORY_RECSIZE = 1
133_CD64_CREATE_VERSION = 2
134_CD64_EXTRACT_VERSION = 3
135_CD64_DISK_NUMBER = 4
136_CD64_DISK_NUMBER_START = 5
137_CD64_NUMBER_ENTRIES_THIS_DISK = 6
138_CD64_NUMBER_ENTRIES_TOTAL = 7
139_CD64_DIRECTORY_SIZE = 8
140_CD64_OFFSET_START_CENTDIR = 9
141
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000142def _check_zipfile(fp):
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000143 try:
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000144 if _EndRecData(fp):
145 return True # file has correct magic number
Fred Drake7e473802001-05-11 19:52:57 +0000146 except IOError:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000147 pass
Guido van Rossum8ca162f2002-04-07 06:36:23 +0000148 return False
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000149
Antoine Pitroudb5fe662008-12-27 15:50:40 +0000150def is_zipfile(filename):
151 """Quickly see if a file is a ZIP file by checking the magic number.
152
153 The filename argument may be a file or file-like object too.
154 """
155 result = False
156 try:
157 if hasattr(filename, "read"):
158 result = _check_zipfile(fp=filename)
159 else:
160 with open(filename, "rb") as fp:
161 result = _check_zipfile(fp)
162 except IOError:
163 pass
164 return result
165
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000166def _EndRecData64(fpin, offset, endrec):
167 """
168 Read the ZIP64 end-of-archive records and use that to update endrec
169 """
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000170 fpin.seek(offset - sizeEndCentDir64Locator, 2)
171 data = fpin.read(sizeEndCentDir64Locator)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173 if sig != stringEndArchive64Locator:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000174 return endrec
175
176 if diskno != 0 or disks != 1:
177 raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179 # Assume no 'zip64 extensible data'
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181 data = fpin.read(sizeEndCentDir64)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000182 sig, sz, create_version, read_version, disk_num, disk_dir, \
183 dircount, dircount2, dirsize, diroffset = \
Georg Brandl2ee470f2008-07-16 12:55:28 +0000184 struct.unpack(structEndArchive64, data)
185 if sig != stringEndArchive64:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000186 return endrec
187
188 # Update the original endrec using data from the ZIP64 record
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000189 endrec[_ECD_SIGNATURE] = sig
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000190 endrec[_ECD_DISK_NUMBER] = disk_num
191 endrec[_ECD_DISK_START] = disk_dir
192 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
193 endrec[_ECD_ENTRIES_TOTAL] = dircount2
194 endrec[_ECD_SIZE] = dirsize
195 endrec[_ECD_OFFSET] = diroffset
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000196 return endrec
197
198
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000199def _EndRecData(fpin):
200 """Return data from the "End of Central Directory" record, or None.
201
202 The data is a list of the nine items in the ZIP "End of central dir"
203 record followed by a tenth item, the file seek offset of this record."""
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000204
205 # Determine file size
206 fpin.seek(0, 2)
207 filesize = fpin.tell()
208
209 # Check to see if this is ZIP file with no archive comment (the
210 # "end of central directory" structure should be the last item in the
211 # file if this is the case).
Amaury Forgeot d'Arcbc347802009-07-28 22:18:57 +0000212 try:
213 fpin.seek(-sizeEndCentDir, 2)
214 except IOError:
215 return None
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000216 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000217 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000218 # the signature is correct and there's no comment, unpack structure
Georg Brandl2ee470f2008-07-16 12:55:28 +0000219 endrec = struct.unpack(structEndArchive, data)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000220 endrec=list(endrec)
221
222 # Append a blank comment and record start offset
223 endrec.append(b"")
224 endrec.append(filesize - sizeEndCentDir)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000225
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000226 # Try to read the "Zip64 end of central directory" structure
227 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000228
229 # Either this is not a ZIP file, or it is a ZIP file with an archive
230 # comment. Search the end of the file for the "end of central directory"
231 # record signature. The comment is the last item in the ZIP file and may be
232 # up to 64K long. It is assumed that the "end of central directory" magic
233 # number does not appear in the comment.
234 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
235 fpin.seek(maxCommentStart, 0)
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000236 data = fpin.read()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000237 start = data.rfind(stringEndArchive)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000238 if start >= 0:
239 # found the magic number; attempt to unpack and interpret
240 recData = data[start:start+sizeEndCentDir]
Georg Brandl2ee470f2008-07-16 12:55:28 +0000241 endrec = list(struct.unpack(structEndArchive, recData))
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000242 comment = data[start+sizeEndCentDir:]
243 # check that comment length is correct
244 if endrec[_ECD_COMMENT_SIZE] == len(comment):
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000245 # Append the archive comment and start offset
246 endrec.append(comment)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000247 endrec.append(maxCommentStart + start)
Amaury Forgeot d'Arcd3fb4bb2009-01-18 00:29:02 +0000248
249 # Try to read the "Zip64 end of central directory" structure
250 return _EndRecData64(fpin, maxCommentStart + start - filesize,
251 endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000252
253 # Unable to find a valid end of central directory structure
254 return
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000255
Fred Drake484d7352000-10-02 21:14:52 +0000256
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000257class ZipInfo (object):
Fred Drake484d7352000-10-02 21:14:52 +0000258 """Class with attributes describing each file in the ZIP archive."""
259
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000260 __slots__ = (
261 'orig_filename',
262 'filename',
263 'date_time',
264 'compress_type',
265 'comment',
266 'extra',
267 'create_system',
268 'create_version',
269 'extract_version',
270 'reserved',
271 'flag_bits',
272 'volume',
273 'internal_attr',
274 'external_attr',
275 'header_offset',
276 'CRC',
277 'compress_size',
278 'file_size',
Christian Heimesfdab48e2008-01-20 09:06:41 +0000279 '_raw_time',
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000280 )
281
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000282 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
Greg Ward8e36d282003-06-18 00:53:06 +0000283 self.orig_filename = filename # Original file name in archive
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000284
285 # Terminate the file name at the first null byte. Null bytes in file
286 # names are used as tricks by viruses in archives.
Greg Ward8e36d282003-06-18 00:53:06 +0000287 null_byte = filename.find(chr(0))
288 if null_byte >= 0:
289 filename = filename[0:null_byte]
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000290 # This is used to ensure paths in generated ZIP files always use
291 # forward slashes as the directory separator, as required by the
292 # ZIP format specification.
293 if os.sep != "/" and os.sep in filename:
Greg Ward8e36d282003-06-18 00:53:06 +0000294 filename = filename.replace(os.sep, "/")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000295
Greg Ward8e36d282003-06-18 00:53:06 +0000296 self.filename = filename # Normalized file name
Tim Peterse1190062001-01-15 03:34:38 +0000297 self.date_time = date_time # year, month, day, hour, min, sec
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000298 # Standard values:
Tim Peterse1190062001-01-15 03:34:38 +0000299 self.compress_type = ZIP_STORED # Type of compression for the file
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000300 self.comment = b"" # Comment for each file
301 self.extra = b"" # ZIP extra data
Martin v. Löwis00756902006-02-05 17:09:41 +0000302 if sys.platform == 'win32':
303 self.create_system = 0 # System which created ZIP archive
304 else:
305 # Assume everything else is unix-y
306 self.create_system = 3 # System which created ZIP archive
Tim Peterse1190062001-01-15 03:34:38 +0000307 self.create_version = 20 # Version which created ZIP archive
308 self.extract_version = 20 # Version needed to extract archive
309 self.reserved = 0 # Must be zero
310 self.flag_bits = 0 # ZIP flag bits
311 self.volume = 0 # Volume number of file header
312 self.internal_attr = 0 # Internal attributes
313 self.external_attr = 0 # External file attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000314 # Other attributes are set by class ZipFile:
Tim Peterse1190062001-01-15 03:34:38 +0000315 # header_offset Byte offset to the file header
Tim Peterse1190062001-01-15 03:34:38 +0000316 # CRC CRC-32 of the uncompressed file
317 # compress_size Size of the compressed file
318 # file_size Size of the uncompressed file
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000319
320 def FileHeader(self):
Fred Drake484d7352000-10-02 21:14:52 +0000321 """Return the per-file header as a string."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000322 dt = self.date_time
323 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +0000324 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000325 if self.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +0000326 # Set these to zero because we write them after the file data
327 CRC = compress_size = file_size = 0
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000328 else:
Tim Peterse1190062001-01-15 03:34:38 +0000329 CRC = self.CRC
330 compress_size = self.compress_size
331 file_size = self.file_size
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000332
333 extra = self.extra
334
335 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
336 # File is larger than what fits into a 4 byte integer,
337 # fall back to the ZIP64 extension
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000338 fmt = '<HHQQ'
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000339 extra = extra + struct.pack(fmt,
340 1, struct.calcsize(fmt)-4, file_size, compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000341 file_size = 0xffffffff
342 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000343 self.extract_version = max(45, self.extract_version)
344 self.create_version = max(45, self.extract_version)
345
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000346 filename, flag_bits = self._encodeFilenameFlags()
Georg Brandl2ee470f2008-07-16 12:55:28 +0000347 header = struct.pack(structFileHeader, stringFileHeader,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000348 self.extract_version, self.reserved, flag_bits,
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000349 self.compress_type, dostime, dosdate, CRC,
350 compress_size, file_size,
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000351 len(filename), len(extra))
352 return header + filename + extra
353
354 def _encodeFilenameFlags(self):
355 try:
356 return self.filename.encode('ascii'), self.flag_bits
357 except UnicodeEncodeError:
358 return self.filename.encode('utf-8'), self.flag_bits | 0x800
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000359
360 def _decodeExtra(self):
361 # Try to decode the extra field.
362 extra = self.extra
363 unpack = struct.unpack
364 while extra:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000365 tp, ln = unpack('<HH', extra[:4])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000366 if tp == 1:
367 if ln >= 24:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000368 counts = unpack('<QQQ', extra[4:28])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 elif ln == 16:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000370 counts = unpack('<QQ', extra[4:20])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000371 elif ln == 8:
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000372 counts = unpack('<Q', extra[4:12])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 elif ln == 0:
374 counts = ()
375 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000376 raise RuntimeError("Corrupt extra field %s"%(ln,))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000377
378 idx = 0
379
380 # ZIP64 extension (large files and/or large archives)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +0000381 if self.file_size in (0xffffffffffffffff, 0xffffffff):
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 self.file_size = counts[idx]
383 idx += 1
384
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000385 if self.compress_size == 0xFFFFFFFF:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386 self.compress_size = counts[idx]
387 idx += 1
388
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000389 if self.header_offset == 0xffffffff:
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000390 old = self.header_offset
391 self.header_offset = counts[idx]
392 idx+=1
393
394 extra = extra[ln+4:]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000395
396
Thomas Wouterscf297e42007-02-23 15:07:44 +0000397class _ZipDecrypter:
398 """Class to handle decryption of files stored within a ZIP archive.
399
400 ZIP supports a password-based form of encryption. Even though known
401 plaintext attacks have been found against it, it is still useful
Christian Heimesfdab48e2008-01-20 09:06:41 +0000402 to be able to get data out of such a file.
Thomas Wouterscf297e42007-02-23 15:07:44 +0000403
404 Usage:
405 zd = _ZipDecrypter(mypwd)
406 plain_char = zd(cypher_char)
407 plain_text = map(zd, cypher_text)
408 """
409
410 def _GenerateCRCTable():
411 """Generate a CRC-32 table.
412
413 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
414 internal keys. We noticed that a direct implementation is faster than
415 relying on binascii.crc32().
416 """
417 poly = 0xedb88320
418 table = [0] * 256
419 for i in range(256):
420 crc = i
421 for j in range(8):
422 if crc & 1:
423 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
424 else:
425 crc = ((crc >> 1) & 0x7FFFFFFF)
426 table[i] = crc
427 return table
428 crctable = _GenerateCRCTable()
429
430 def _crc32(self, ch, crc):
431 """Compute the CRC32 primitive on one byte."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000432 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000433
434 def __init__(self, pwd):
435 self.key0 = 305419896
436 self.key1 = 591751049
437 self.key2 = 878082192
438 for p in pwd:
439 self._UpdateKeys(p)
440
441 def _UpdateKeys(self, c):
442 self.key0 = self._crc32(c, self.key0)
443 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
444 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000445 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000446
447 def __call__(self, c):
448 """Decrypt a single character."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000449 assert isinstance(c, int)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000450 k = self.key2 | 2
451 c = c ^ (((k * (k^1)) >> 8) & 255)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000452 self._UpdateKeys(c)
453 return c
454
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000455class ZipExtFile(io.BufferedIOBase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000456 """File-like object for reading an archive member.
457 Is returned by ZipFile.open().
458 """
459
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000460 # Max size supported by decompressor.
461 MAX_N = 1 << 31 - 1
Guido van Rossumd8faa362007-04-27 19:54:29 +0000462
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000463 # Read from compressed files in 4k blocks.
464 MIN_READ_SIZE = 4096
Guido van Rossumd8faa362007-04-27 19:54:29 +0000465
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000466 # Search for universal newlines or line chunks.
467 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
468
469 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
470 self._fileobj = fileobj
471 self._decrypter = decrypter
472
Ezio Melotti92b47432010-01-28 01:44:41 +0000473 self._compress_type = zipinfo.compress_type
474 self._compress_size = zipinfo.compress_size
475 self._compress_left = zipinfo.compress_size
476
477 if self._compress_type == ZIP_DEFLATED:
478 self._decompressor = zlib.decompressobj(-15)
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000479 self._unconsumed = b''
480
481 self._readbuffer = b''
482 self._offset = 0
483
484 self._universal = 'U' in mode
485 self.newlines = None
486
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000487 # Adjust read size for encrypted files since the first 12 bytes
488 # are for the encryption/password information.
489 if self._decrypter is not None:
490 self._compress_left -= 12
491
492 self.mode = mode
Guido van Rossumd8faa362007-04-27 19:54:29 +0000493 self.name = zipinfo.filename
494
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000495 def readline(self, limit=-1):
496 """Read and return a line from the stream.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000497
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000498 If limit is specified, at most limit bytes will be read.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000499 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000500
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000501 if not self._universal and limit < 0:
502 # Shortcut common case - newline found in buffer.
503 i = self._readbuffer.find(b'\n', self._offset) + 1
504 if i > 0:
505 line = self._readbuffer[self._offset: i]
506 self._offset = i
507 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000508
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000509 if not self._universal:
510 return io.BufferedIOBase.readline(self, limit)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000511
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000512 line = b''
513 while limit < 0 or len(line) < limit:
514 readahead = self.peek(2)
515 if readahead == b'':
516 return line
Guido van Rossumd8faa362007-04-27 19:54:29 +0000517
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000518 #
519 # Search for universal newlines or line chunks.
520 #
521 # The pattern returns either a line chunk or a newline, but not
522 # both. Combined with peek(2), we are assured that the sequence
523 # '\r\n' is always retrieved completely and never split into
524 # separate newlines - '\r', '\n' due to coincidental readaheads.
525 #
526 match = self.PATTERN.search(readahead)
527 newline = match.group('newline')
528 if newline is not None:
529 if self.newlines is None:
530 self.newlines = []
531 if newline not in self.newlines:
532 self.newlines.append(newline)
533 self._offset += len(newline)
534 return line + b'\n'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000535
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000536 chunk = match.group('chunk')
537 if limit >= 0:
538 chunk = chunk[: limit - len(line)]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000539
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000540 self._offset += len(chunk)
541 line += chunk
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000543 return line
544
545 def peek(self, n=1):
546 """Returns buffered bytes without advancing the position."""
547 if n > len(self._readbuffer) - self._offset:
548 chunk = self.read(n)
549 self._offset -= len(chunk)
550
551 # Return up to 512 bytes to reduce allocation overhead for tight loops.
552 return self._readbuffer[self._offset: self._offset + 512]
553
554 def readable(self):
555 return True
556
557 def read(self, n=-1):
558 """Read and return up to n bytes.
559 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
Guido van Rossumd8faa362007-04-27 19:54:29 +0000560 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000561
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000562 buf = b''
563 while n < 0 or n is None or n > len(buf):
564 data = self.read1(n)
565 if len(data) == 0:
566 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000568 buf += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000569
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000570 return buf
Guido van Rossumd8faa362007-04-27 19:54:29 +0000571
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000572 def read1(self, n):
573 """Read up to n bytes with at most one read() system call."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000574
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000575 # Simplify algorithm (branching) by transforming negative n to large n.
576 if n < 0 or n is None:
577 n = self.MAX_N
Guido van Rossumd8faa362007-04-27 19:54:29 +0000578
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000579 # Bytes available in read buffer.
580 len_readbuffer = len(self._readbuffer) - self._offset
Guido van Rossumd8faa362007-04-27 19:54:29 +0000581
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000582 # Read from file.
583 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
584 nbytes = n - len_readbuffer - len(self._unconsumed)
585 nbytes = max(nbytes, self.MIN_READ_SIZE)
586 nbytes = min(nbytes, self._compress_left)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000587
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000588 data = self._fileobj.read(nbytes)
589 self._compress_left -= len(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000590
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000591 if data and self._decrypter is not None:
592 data = bytes(map(self._decrypter, data))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000593
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000594 if self._compress_type == ZIP_STORED:
595 self._readbuffer = self._readbuffer[self._offset:] + data
596 self._offset = 0
597 else:
598 # Prepare deflated bytes for decompression.
599 self._unconsumed += data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000600
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000601 # Handle unconsumed data.
Ezio Melotti92b47432010-01-28 01:44:41 +0000602 if (len(self._unconsumed) > 0 and n > len_readbuffer and
603 self._compress_type == ZIP_DEFLATED):
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000604 data = self._decompressor.decompress(
605 self._unconsumed,
606 max(n - len_readbuffer, self.MIN_READ_SIZE)
607 )
Guido van Rossumd8faa362007-04-27 19:54:29 +0000608
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000609 self._unconsumed = self._decompressor.unconsumed_tail
610 if len(self._unconsumed) == 0 and self._compress_left == 0:
611 data += self._decompressor.flush()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000612
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000613 self._readbuffer = self._readbuffer[self._offset:] + data
614 self._offset = 0
615
616 # Read from buffer.
617 data = self._readbuffer[self._offset: self._offset + n]
618 self._offset += len(data)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000619 return data
Guido van Rossumd8faa362007-04-27 19:54:29 +0000620
621
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000622
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000623class ZipFile:
Tim Petersa19a1682001-03-29 04:36:09 +0000624 """ Class with methods to open, read, write, close, list zip files.
625
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000626 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
Tim Petersa19a1682001-03-29 04:36:09 +0000627
Fred Drake3d9091e2001-03-26 15:49:24 +0000628 file: Either the path to the file, or a file-like object.
629 If it is a path, the file will be opened and closed by ZipFile.
630 mode: The mode can be either read "r", write "w" or append "a".
631 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000632 allowZip64: if True ZipFile will create files with ZIP64 extensions when
633 needed, otherwise it will raise an exception when this would
634 be necessary.
635
Fred Drake3d9091e2001-03-26 15:49:24 +0000636 """
Fred Drake484d7352000-10-02 21:14:52 +0000637
Fred Drake90eac282001-02-28 05:29:34 +0000638 fp = None # Set here since __del__ checks it
639
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000640 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
Fred Drake484d7352000-10-02 21:14:52 +0000641 """Open the ZIP file with mode read "r", write "w" or append "a"."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000642 if mode not in ("r", "w", "a"):
643 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
644
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000645 if compression == ZIP_STORED:
646 pass
647 elif compression == ZIP_DEFLATED:
648 if not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000649 raise RuntimeError(
650 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000651 else:
Collin Winterce36ad82007-08-30 01:19:48 +0000652 raise RuntimeError("That compression method is not supported")
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000653
654 self._allowZip64 = allowZip64
655 self._didModify = False
Tim Peterse1190062001-01-15 03:34:38 +0000656 self.debug = 0 # Level of printing: 0 through 3
657 self.NameToInfo = {} # Find file info given name
658 self.filelist = [] # List of ZipInfo instances for archive
659 self.compression = compression # Method of compression
Raymond Hettinger2ca7c192005-02-16 09:27:49 +0000660 self.mode = key = mode.replace('b', '')[0]
Thomas Wouterscf297e42007-02-23 15:07:44 +0000661 self.pwd = None
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000662 self.comment = b''
Tim Petersa19a1682001-03-29 04:36:09 +0000663
Fred Drake3d9091e2001-03-26 15:49:24 +0000664 # Check if we were passed a file-like object
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000665 if isinstance(file, str):
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000666 # No, it's a filename
Fred Drake3d9091e2001-03-26 15:49:24 +0000667 self._filePassed = 0
668 self.filename = file
669 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
Thomas Wouterscf297e42007-02-23 15:07:44 +0000670 try:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000671 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000672 except IOError:
673 if mode == 'a':
674 mode = key = 'w'
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000675 self.fp = io.open(file, modeDict[mode])
Thomas Wouterscf297e42007-02-23 15:07:44 +0000676 else:
677 raise
Fred Drake3d9091e2001-03-26 15:49:24 +0000678 else:
679 self._filePassed = 1
680 self.fp = file
681 self.filename = getattr(file, 'name', None)
Tim Petersa19a1682001-03-29 04:36:09 +0000682
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000683 if key == 'r':
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000684 self._GetContents()
685 elif key == 'w':
Fred Drake3d9091e2001-03-26 15:49:24 +0000686 pass
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000687 elif key == 'a':
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000688 try: # See if file is a zip file
689 self._RealGetContents()
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000690 # seek to start of directory and overwrite
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000691 self.fp.seek(self.start_dir, 0)
692 except BadZipfile: # file is not a zip file, just append
693 self.fp.seek(0, 2)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000694 else:
Tim Peters7d3bad62001-04-04 18:56:49 +0000695 if not self._filePassed:
696 self.fp.close()
697 self.fp = None
Collin Winterce36ad82007-08-30 01:19:48 +0000698 raise RuntimeError('Mode must be "r", "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000699
Ezio Melottifaa6b7f2009-12-30 12:34:59 +0000700 def __enter__(self):
701 return self
702
703 def __exit__(self, type, value, traceback):
704 self.close()
705
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000706 def _GetContents(self):
Tim Peters7d3bad62001-04-04 18:56:49 +0000707 """Read the directory, making sure we close the file if the format
708 is bad."""
709 try:
710 self._RealGetContents()
711 except BadZipfile:
712 if not self._filePassed:
713 self.fp.close()
714 self.fp = None
715 raise
716
717 def _RealGetContents(self):
Fred Drake484d7352000-10-02 21:14:52 +0000718 """Read in the table of contents for the ZIP file."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000719 fp = self.fp
Martin v. Löwis6f6873b2002-10-13 13:54:50 +0000720 endrec = _EndRecData(fp)
721 if not endrec:
Collin Winterce36ad82007-08-30 01:19:48 +0000722 raise BadZipfile("File is not a zip file")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000723 if self.debug > 1:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000724 print(endrec)
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000725 size_cd = endrec[_ECD_SIZE] # bytes in central directory
726 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
727 self.comment = endrec[_ECD_COMMENT] # archive comment
728
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000729 # "concat" is zero, unless zip was concatenated to another file
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000730 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
Antoine Pitrou9e4fdf42008-09-05 23:43:02 +0000731 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
732 # If Zip64 extension structures are present, account for them
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000733 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
734
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000735 if self.debug > 2:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000736 inferred = concat + offset_cd
737 print("given, inferred, offset", offset_cd, inferred, concat)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000738 # self.start_dir: Position of start of central directory
739 self.start_dir = offset_cd + concat
740 fp.seek(self.start_dir, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000741 data = fp.read(size_cd)
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000742 fp = io.BytesIO(data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000743 total = 0
744 while total < size_cd:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000745 centdir = fp.read(sizeCentralDir)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000746 if centdir[0:4] != stringCentralDir:
Collin Winterce36ad82007-08-30 01:19:48 +0000747 raise BadZipfile("Bad magic number for central directory")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000748 centdir = struct.unpack(structCentralDir, centdir)
749 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000750 print(centdir)
Fred Drake3e038e52001-02-28 17:56:26 +0000751 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000752 flags = centdir[5]
753 if flags & 0x800:
754 # UTF-8 file names extension
755 filename = filename.decode('utf-8')
756 else:
757 # Historical ZIP filename encoding
758 filename = filename.decode('cp437')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000759 # Create ZipInfo instance to store file information
Martin v. Löwis8570f6a2008-05-05 17:44:38 +0000760 x = ZipInfo(filename)
Fred Drake3e038e52001-02-28 17:56:26 +0000761 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
762 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000763 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000764 (x.create_version, x.create_system, x.extract_version, x.reserved,
765 x.flag_bits, x.compress_type, t, d,
766 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
767 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
768 # Convert date/time code to (year, month, day, hour, min, sec)
Christian Heimesfdab48e2008-01-20 09:06:41 +0000769 x._raw_time = t
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000770 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
Fred Drake414ca662000-06-13 18:49:53 +0000771 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000772
773 x._decodeExtra()
774 x.header_offset = x.header_offset + concat
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000775 self.filelist.append(x)
776 self.NameToInfo[x.filename] = x
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000777
778 # update total bytes read from central directory
779 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
780 + centdir[_CD_EXTRA_FIELD_LENGTH]
781 + centdir[_CD_COMMENT_LENGTH])
782
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000783 if self.debug > 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000784 print("total", total)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000785
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000786
787 def namelist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000788 """Return a list of file names in the archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000789 l = []
790 for data in self.filelist:
791 l.append(data.filename)
792 return l
793
794 def infolist(self):
Fred Drake484d7352000-10-02 21:14:52 +0000795 """Return a list of class ZipInfo instances for files in the
796 archive."""
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000797 return self.filelist
798
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000799 def printdir(self, file=None):
Fred Drake484d7352000-10-02 21:14:52 +0000800 """Print a table of contents for the zip file."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000801 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
802 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000803 for zinfo in self.filelist:
Guido van Rossum7736b5b2008-01-15 21:44:53 +0000804 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000805 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
806 file=file)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000807
808 def testzip(self):
Fred Drake484d7352000-10-02 21:14:52 +0000809 """Read all the files and check the CRC."""
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000810 chunk_size = 2 ** 20
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000811 for zinfo in self.filelist:
812 try:
Benjamin Peterson4cd6a952008-08-17 20:23:46 +0000813 # Read by chunks, to avoid an OverflowError or a
814 # MemoryError with very large embedded files.
815 f = self.open(zinfo.filename, "r")
816 while f.read(chunk_size): # Check CRC-32
817 pass
Raymond Hettingerc0fac962003-06-27 22:25:03 +0000818 except BadZipfile:
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000819 return zinfo.filename
820
821 def getinfo(self, name):
Fred Drake484d7352000-10-02 21:14:52 +0000822 """Return the instance of ZipInfo given 'name'."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000823 info = self.NameToInfo.get(name)
824 if info is None:
825 raise KeyError(
826 'There is no item named %r in the archive' % name)
827
828 return info
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000829
Thomas Wouterscf297e42007-02-23 15:07:44 +0000830 def setpassword(self, pwd):
831 """Set default password for encrypted files."""
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000832 assert isinstance(pwd, bytes)
Thomas Wouterscf297e42007-02-23 15:07:44 +0000833 self.pwd = pwd
834
835 def read(self, name, pwd=None):
Fred Drake484d7352000-10-02 21:14:52 +0000836 """Return file bytes (as a string) for name."""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837 return self.open(name, "r", pwd).read()
838
839 def open(self, name, mode="r", pwd=None):
840 """Return file-like object for 'name'."""
841 if mode not in ("r", "U", "rU"):
Collin Winterce36ad82007-08-30 01:19:48 +0000842 raise RuntimeError('open() requires mode "r", "U", or "rU"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000843 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000844 raise RuntimeError(
845 "Attempt to read ZIP archive that was already closed")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000846
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847 # Only open a new file for instances where we were not
848 # given a file object in the constructor
849 if self._filePassed:
850 zef_file = self.fp
851 else:
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000852 zef_file = io.open(self.filename, 'rb')
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853
Georg Brandlb533e262008-05-25 18:19:30 +0000854 # Make sure we have an info object
855 if isinstance(name, ZipInfo):
856 # 'name' is already an info object
857 zinfo = name
858 else:
859 # Get info object for name
860 zinfo = self.getinfo(name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861
862 zef_file.seek(zinfo.header_offset, 0)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000863
864 # Skip the file header:
Martin v. Löwisb09b8442008-07-03 14:13:42 +0000865 fheader = zef_file.read(sizeFileHeader)
Georg Brandl2ee470f2008-07-16 12:55:28 +0000866 if fheader[0:4] != stringFileHeader:
Collin Winterce36ad82007-08-30 01:19:48 +0000867 raise BadZipfile("Bad magic number for file header")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000868
869 fheader = struct.unpack(structFileHeader, fheader)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000871 if fheader[_FH_EXTRA_FIELD_LENGTH]:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000873
Guido van Rossumd6ca5462007-05-22 01:29:33 +0000874 if fname != zinfo.orig_filename.encode("utf-8"):
Collin Winterce36ad82007-08-30 01:19:48 +0000875 raise BadZipfile(
876 'File name in directory %r and header %r differ.'
877 % (zinfo.orig_filename, fname))
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000878
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 # check for encrypted flag & handle password
880 is_encrypted = zinfo.flag_bits & 0x1
881 zd = None
Thomas Wouterscf297e42007-02-23 15:07:44 +0000882 if is_encrypted:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883 if not pwd:
884 pwd = self.pwd
885 if not pwd:
Collin Winterce36ad82007-08-30 01:19:48 +0000886 raise RuntimeError("File %s is encrypted, "
887 "password required for extraction" % name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888
Thomas Wouterscf297e42007-02-23 15:07:44 +0000889 zd = _ZipDecrypter(pwd)
890 # The first 12 bytes in the cypher stream is an encryption header
891 # used to strengthen the algorithm. The first 11 bytes are
892 # completely random, while the 12th contains the MSB of the CRC,
Christian Heimesfdab48e2008-01-20 09:06:41 +0000893 # or the MSB of the file time depending on the header type
Thomas Wouterscf297e42007-02-23 15:07:44 +0000894 # and is used to check the correctness of the password.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895 bytes = zef_file.read(12)
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000896 h = list(map(zd, bytes[0:12]))
Christian Heimesfdab48e2008-01-20 09:06:41 +0000897 if zinfo.flag_bits & 0x8:
898 # compare against the file type from extended local headers
899 check_byte = (zinfo._raw_time >> 8) & 0xff
900 else:
901 # compare against the CRC otherwise
902 check_byte = (zinfo.CRC >> 24) & 0xff
903 if h[11] != check_byte:
904 raise RuntimeError("Bad password for file", name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905
Antoine Pitroua32f9a22010-01-27 21:18:57 +0000906 return ZipExtFile(zef_file, mode, zinfo, zd)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000907
Christian Heimes790c8232008-01-07 21:14:23 +0000908 def extract(self, member, path=None, pwd=None):
909 """Extract a member from the archive to the current working directory,
910 using its full name. Its file information is extracted as accurately
911 as possible. `member' may be a filename or a ZipInfo object. You can
912 specify a different directory using `path'.
913 """
914 if not isinstance(member, ZipInfo):
915 member = self.getinfo(member)
916
917 if path is None:
918 path = os.getcwd()
919
920 return self._extract_member(member, path, pwd)
921
922 def extractall(self, path=None, members=None, pwd=None):
923 """Extract all members from the archive to the current working
924 directory. `path' specifies a different directory to extract to.
925 `members' is optional and must be a subset of the list returned
926 by namelist().
927 """
928 if members is None:
929 members = self.namelist()
930
931 for zipinfo in members:
932 self.extract(zipinfo, path, pwd)
933
934 def _extract_member(self, member, targetpath, pwd):
935 """Extract the ZipInfo object 'member' to a physical
936 file on the path targetpath.
937 """
938 # build the destination pathname, replacing
939 # forward slashes to platform specific separators.
Antoine Pitrou3c33e082009-05-04 21:21:36 +0000940 # Strip trailing path separator, unless it represents the root.
941 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
942 and len(os.path.splitdrive(targetpath)[1]) > 1):
Christian Heimes790c8232008-01-07 21:14:23 +0000943 targetpath = targetpath[:-1]
944
945 # don't include leading "/" from file name if present
Martin v. Löwis59e47792009-01-24 14:10:07 +0000946 if member.filename[0] == '/':
Christian Heimes790c8232008-01-07 21:14:23 +0000947 targetpath = os.path.join(targetpath, member.filename[1:])
948 else:
949 targetpath = os.path.join(targetpath, member.filename)
950
951 targetpath = os.path.normpath(targetpath)
952
953 # Create all upper directories if necessary.
954 upperdirs = os.path.dirname(targetpath)
955 if upperdirs and not os.path.exists(upperdirs):
956 os.makedirs(upperdirs)
957
Martin v. Löwis59e47792009-01-24 14:10:07 +0000958 if member.filename[-1] == '/':
Martin v. Löwis70ccd162009-05-24 19:47:22 +0000959 if not os.path.isdir(targetpath):
960 os.mkdir(targetpath)
Martin v. Löwis59e47792009-01-24 14:10:07 +0000961 return targetpath
962
Georg Brandlb533e262008-05-25 18:19:30 +0000963 source = self.open(member, pwd=pwd)
Christian Heimes790c8232008-01-07 21:14:23 +0000964 target = open(targetpath, "wb")
965 shutil.copyfileobj(source, target)
966 source.close()
967 target.close()
968
969 return targetpath
970
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000971 def _writecheck(self, zinfo):
Fred Drake484d7352000-10-02 21:14:52 +0000972 """Check for errors before writing a file to the archive."""
Raymond Hettinger54f02222002-06-01 14:18:47 +0000973 if zinfo.filename in self.NameToInfo:
Tim Peterse1190062001-01-15 03:34:38 +0000974 if self.debug: # Warning for duplicate names
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000975 print("Duplicate name:", zinfo.filename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000976 if self.mode not in ("w", "a"):
Collin Winterce36ad82007-08-30 01:19:48 +0000977 raise RuntimeError('write() requires mode "w" or "a"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000978 if not self.fp:
Collin Winterce36ad82007-08-30 01:19:48 +0000979 raise RuntimeError(
980 "Attempt to write ZIP archive that was already closed")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000981 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
Collin Winterce36ad82007-08-30 01:19:48 +0000982 raise RuntimeError(
983 "Compression requires the (missing) zlib module")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000984 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
Collin Winterce36ad82007-08-30 01:19:48 +0000985 raise RuntimeError("That compression method is not supported")
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000986 if zinfo.file_size > ZIP64_LIMIT:
987 if not self._allowZip64:
988 raise LargeZipFile("Filesize would require ZIP64 extensions")
989 if zinfo.header_offset > ZIP64_LIMIT:
990 if not self._allowZip64:
Collin Winterce36ad82007-08-30 01:19:48 +0000991 raise LargeZipFile(
992 "Zipfile size would require ZIP64 extensions")
Guido van Rossum32abe6f2000-03-31 17:30:02 +0000993
994 def write(self, filename, arcname=None, compress_type=None):
Fred Drake484d7352000-10-02 21:14:52 +0000995 """Put the bytes from filename into the archive under the name
996 arcname."""
Guido van Rossumb5a755e2007-07-18 18:15:48 +0000997 if not self.fp:
998 raise RuntimeError(
999 "Attempt to write to ZIP archive that was already closed")
1000
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001001 st = os.stat(filename)
Martin v. Löwis59e47792009-01-24 14:10:07 +00001002 isdir = stat.S_ISDIR(st.st_mode)
Raymond Hettinger32200ae2002-06-01 19:51:15 +00001003 mtime = time.localtime(st.st_mtime)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001004 date_time = mtime[0:6]
1005 # Create ZipInfo instance to store file information
1006 if arcname is None:
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001007 arcname = filename
1008 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1009 while arcname[0] in (os.sep, os.altsep):
1010 arcname = arcname[1:]
Martin v. Löwis59e47792009-01-24 14:10:07 +00001011 if isdir:
1012 arcname += '/'
Georg Brandl8f7c54e2006-02-20 08:40:38 +00001013 zinfo = ZipInfo(arcname, date_time)
Guido van Rossume2a383d2007-01-15 16:59:06 +00001014 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001015 if compress_type is None:
Tim Peterse1190062001-01-15 03:34:38 +00001016 zinfo.compress_type = self.compression
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001017 else:
Tim Peterse1190062001-01-15 03:34:38 +00001018 zinfo.compress_type = compress_type
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001019
1020 zinfo.file_size = st.st_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001021 zinfo.flag_bits = 0x00
Tim Peterse1190062001-01-15 03:34:38 +00001022 zinfo.header_offset = self.fp.tell() # Start of header bytes
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001023
1024 self._writecheck(zinfo)
1025 self._didModify = True
Martin v. Löwis59e47792009-01-24 14:10:07 +00001026
1027 if isdir:
1028 zinfo.file_size = 0
1029 zinfo.compress_size = 0
1030 zinfo.CRC = 0
1031 self.filelist.append(zinfo)
1032 self.NameToInfo[zinfo.filename] = zinfo
1033 self.fp.write(zinfo.FileHeader())
1034 return
1035
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001036 with open(filename, "rb") as fp:
1037 # Must overwrite CRC and sizes with correct data later
1038 zinfo.CRC = CRC = 0
1039 zinfo.compress_size = compress_size = 0
1040 zinfo.file_size = file_size = 0
1041 self.fp.write(zinfo.FileHeader())
1042 if zinfo.compress_type == ZIP_DEFLATED:
1043 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1044 zlib.DEFLATED, -15)
1045 else:
1046 cmpr = None
1047 while 1:
1048 buf = fp.read(1024 * 8)
1049 if not buf:
1050 break
1051 file_size = file_size + len(buf)
1052 CRC = crc32(buf, CRC) & 0xffffffff
1053 if cmpr:
1054 buf = cmpr.compress(buf)
1055 compress_size = compress_size + len(buf)
1056 self.fp.write(buf)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001057 if cmpr:
1058 buf = cmpr.flush()
1059 compress_size = compress_size + len(buf)
1060 self.fp.write(buf)
1061 zinfo.compress_size = compress_size
1062 else:
1063 zinfo.compress_size = file_size
1064 zinfo.CRC = CRC
1065 zinfo.file_size = file_size
Finn Bock03a3bb82001-09-05 18:40:33 +00001066 # Seek backwards and write CRC and file sizes
Tim Petersb64bec32001-09-18 02:26:39 +00001067 position = self.fp.tell() # Preserve current position in file
Finn Bock03a3bb82001-09-05 18:40:33 +00001068 self.fp.seek(zinfo.header_offset + 14, 0)
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001069 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001070 zinfo.file_size))
Finn Bock03a3bb82001-09-05 18:40:33 +00001071 self.fp.seek(position, 0)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001072 self.filelist.append(zinfo)
1073 self.NameToInfo[zinfo.filename] = zinfo
1074
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001075 def writestr(self, zinfo_or_arcname, data, compress_type=None):
Guido van Rossum85825dc2007-08-27 17:03:28 +00001076 """Write a file into the archive. The contents is 'data', which
1077 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1078 it is encoded as UTF-8 first.
1079 'zinfo_or_arcname' is either a ZipInfo instance or
Just van Rossumb083cb32002-12-12 12:23:32 +00001080 the name of the file in the archive."""
Guido van Rossum85825dc2007-08-27 17:03:28 +00001081 if isinstance(data, str):
1082 data = data.encode("utf-8")
Just van Rossumb083cb32002-12-12 12:23:32 +00001083 if not isinstance(zinfo_or_arcname, ZipInfo):
1084 zinfo = ZipInfo(filename=zinfo_or_arcname,
Guido van Rossum7736b5b2008-01-15 21:44:53 +00001085 date_time=time.localtime(time.time())[:6])
Just van Rossumb083cb32002-12-12 12:23:32 +00001086 zinfo.compress_type = self.compression
Antoine Pitrou6e1df8d2008-07-25 19:58:18 +00001087 zinfo.external_attr = 0o600 << 16
Just van Rossumb083cb32002-12-12 12:23:32 +00001088 else:
1089 zinfo = zinfo_or_arcname
Guido van Rossumb5a755e2007-07-18 18:15:48 +00001090
1091 if not self.fp:
1092 raise RuntimeError(
1093 "Attempt to write to ZIP archive that was already closed")
1094
Guido van Rossum85825dc2007-08-27 17:03:28 +00001095 zinfo.file_size = len(data) # Uncompressed size
1096 zinfo.header_offset = self.fp.tell() # Start of header data
Ronald Oussorenee5c8852010-02-07 20:24:02 +00001097 if compress_type is not None:
1098 zinfo.compress_type = compress_type
1099
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001100 self._writecheck(zinfo)
1101 self._didModify = True
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001102 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001103 if zinfo.compress_type == ZIP_DEFLATED:
1104 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1105 zlib.DEFLATED, -15)
Guido van Rossum85825dc2007-08-27 17:03:28 +00001106 data = co.compress(data) + co.flush()
1107 zinfo.compress_size = len(data) # Compressed size
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001108 else:
1109 zinfo.compress_size = zinfo.file_size
Guido van Rossum85825dc2007-08-27 17:03:28 +00001110 zinfo.header_offset = self.fp.tell() # Start of header data
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001111 self.fp.write(zinfo.FileHeader())
Guido van Rossum85825dc2007-08-27 17:03:28 +00001112 self.fp.write(data)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001113 self.fp.flush()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001114 if zinfo.flag_bits & 0x08:
Tim Peterse1190062001-01-15 03:34:38 +00001115 # Write CRC and file sizes after the file data
Gregory P. Smithe88749b2009-06-26 08:05:13 +00001116 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
Tim Peterse1190062001-01-15 03:34:38 +00001117 zinfo.file_size))
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001118 self.filelist.append(zinfo)
1119 self.NameToInfo[zinfo.filename] = zinfo
1120
1121 def __del__(self):
Fred Drake484d7352000-10-02 21:14:52 +00001122 """Call the "close()" method in case the user forgot."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001123 self.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001124
1125 def close(self):
Fred Drake484d7352000-10-02 21:14:52 +00001126 """Close the file, and for mode "w" and "a" write the ending
1127 records."""
Tim Petersd15f8bb2001-11-28 23:16:40 +00001128 if self.fp is None:
1129 return
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001130
1131 if self.mode in ("w", "a") and self._didModify: # write ending records
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001132 count = 0
1133 pos1 = self.fp.tell()
Tim Peterse1190062001-01-15 03:34:38 +00001134 for zinfo in self.filelist: # write central directory
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001135 count = count + 1
1136 dt = zinfo.date_time
1137 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
Tim Peters3caca232001-12-06 06:23:26 +00001138 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001139 extra = []
1140 if zinfo.file_size > ZIP64_LIMIT \
1141 or zinfo.compress_size > ZIP64_LIMIT:
1142 extra.append(zinfo.file_size)
1143 extra.append(zinfo.compress_size)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001144 file_size = 0xffffffff
1145 compress_size = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001146 else:
1147 file_size = zinfo.file_size
1148 compress_size = zinfo.compress_size
1149
1150 if zinfo.header_offset > ZIP64_LIMIT:
1151 extra.append(zinfo.header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001152 header_offset = 0xffffffff
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001153 else:
1154 header_offset = zinfo.header_offset
1155
1156 extra_data = zinfo.extra
1157 if extra:
1158 # Append a ZIP64 field to the extra's
1159 extra_data = struct.pack(
Christian Heimesd5e2b6f2008-03-19 21:50:51 +00001160 '<HH' + 'Q'*len(extra),
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001161 1, 8*len(extra), *extra) + extra_data
1162
1163 extract_version = max(45, zinfo.extract_version)
1164 create_version = max(45, zinfo.create_version)
1165 else:
1166 extract_version = zinfo.extract_version
1167 create_version = zinfo.create_version
1168
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001169 try:
1170 filename, flag_bits = zinfo._encodeFilenameFlags()
1171 centdir = struct.pack(structCentralDir,
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001172 stringCentralDir, create_version,
1173 zinfo.create_system, extract_version, zinfo.reserved,
1174 flag_bits, zinfo.compress_type, dostime, dosdate,
1175 zinfo.CRC, compress_size, file_size,
1176 len(filename), len(extra_data), len(zinfo.comment),
1177 0, zinfo.internal_attr, zinfo.external_attr,
1178 header_offset)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001179 except DeprecationWarning:
Ezio Melottiaf30d2e2009-09-25 21:35:24 +00001180 print((structCentralDir, stringCentralDir, create_version,
1181 zinfo.create_system, extract_version, zinfo.reserved,
1182 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1183 zinfo.CRC, compress_size, file_size,
1184 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1185 0, zinfo.internal_attr, zinfo.external_attr,
1186 header_offset), file=sys.stderr)
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001187 raise
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001188 self.fp.write(centdir)
Martin v. Löwis8570f6a2008-05-05 17:44:38 +00001189 self.fp.write(filename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001190 self.fp.write(extra_data)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001191 self.fp.write(zinfo.comment)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001192
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001193 pos2 = self.fp.tell()
1194 # Write end-of-zip-archive record
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001195 centDirCount = count
1196 centDirSize = pos2 - pos1
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001197 centDirOffset = pos1
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001198 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1199 centDirOffset > ZIP64_LIMIT or
1200 centDirSize > ZIP64_LIMIT):
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001201 # Need to write the ZIP64 end-of-archive records
1202 zip64endrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001203 structEndArchive64, stringEndArchive64,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001204 44, 45, 45, 0, 0, centDirCount, centDirCount,
1205 centDirSize, centDirOffset)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001206 self.fp.write(zip64endrec)
1207
1208 zip64locrec = struct.pack(
Georg Brandl2ee470f2008-07-16 12:55:28 +00001209 structEndArchive64Locator,
1210 stringEndArchive64Locator, 0, pos2, 1)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001211 self.fp.write(zip64locrec)
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001212 centDirCount = min(centDirCount, 0xFFFF)
1213 centDirSize = min(centDirSize, 0xFFFFFFFF)
1214 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001215
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001216 # check for valid comment length
1217 if len(self.comment) >= ZIP_MAX_COMMENT:
1218 if self.debug > 0:
1219 msg = 'Archive comment is too long; truncating to %d bytes' \
1220 % ZIP_MAX_COMMENT
1221 self.comment = self.comment[:ZIP_MAX_COMMENT]
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001222
Georg Brandl2ee470f2008-07-16 12:55:28 +00001223 endrec = struct.pack(structEndArchive, stringEndArchive,
Amaury Forgeot d'Arc0c3f8a42009-01-17 16:42:26 +00001224 0, 0, centDirCount, centDirCount,
1225 centDirSize, centDirOffset, len(self.comment))
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001226 self.fp.write(endrec)
1227 self.fp.write(self.comment)
Guido van Rossumf85af612001-04-14 16:45:14 +00001228 self.fp.flush()
Martin v. Löwisb09b8442008-07-03 14:13:42 +00001229
Fred Drake3d9091e2001-03-26 15:49:24 +00001230 if not self._filePassed:
1231 self.fp.close()
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001232 self.fp = None
1233
1234
1235class PyZipFile(ZipFile):
Fred Drake484d7352000-10-02 21:14:52 +00001236 """Class to create ZIP archives with Python library files and packages."""
1237
Georg Brandlfe991052009-09-16 15:54:04 +00001238 def writepy(self, pathname, basename=""):
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001239 """Add all files from "pathname" to the ZIP archive.
1240
Fred Drake484d7352000-10-02 21:14:52 +00001241 If pathname is a package directory, search the directory and
1242 all package subdirectories recursively for all *.py and enter
1243 the modules into the archive. If pathname is a plain
1244 directory, listdir *.py and enter all modules. Else, pathname
1245 must be a Python *.py file and the module will be put into the
1246 archive. Added modules are always module.pyo or module.pyc.
1247 This method will compile the module.py into module.pyc if
1248 necessary.
1249 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001250 dir, name = os.path.split(pathname)
1251 if os.path.isdir(pathname):
1252 initname = os.path.join(pathname, "__init__.py")
1253 if os.path.isfile(initname):
1254 # This is a package directory, add it
1255 if basename:
1256 basename = "%s/%s" % (basename, name)
1257 else:
1258 basename = name
1259 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001260 print("Adding package in", pathname, "as", basename)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001261 fname, arcname = self._get_codename(initname[0:-3], basename)
1262 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001263 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001264 self.write(fname, arcname)
1265 dirlist = os.listdir(pathname)
1266 dirlist.remove("__init__.py")
1267 # Add all *.py files and package subdirectories
1268 for filename in dirlist:
1269 path = os.path.join(pathname, filename)
1270 root, ext = os.path.splitext(filename)
1271 if os.path.isdir(path):
1272 if os.path.isfile(os.path.join(path, "__init__.py")):
1273 # This is a package directory, add it
1274 self.writepy(path, basename) # Recursive call
1275 elif ext == ".py":
1276 fname, arcname = self._get_codename(path[0:-3],
1277 basename)
1278 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001279 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001280 self.write(fname, arcname)
1281 else:
1282 # This is NOT a package directory, add its files at top level
1283 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001284 print("Adding files from directory", pathname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001285 for filename in os.listdir(pathname):
1286 path = os.path.join(pathname, filename)
1287 root, ext = os.path.splitext(filename)
1288 if ext == ".py":
1289 fname, arcname = self._get_codename(path[0:-3],
1290 basename)
1291 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001292 print("Adding", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001293 self.write(fname, arcname)
1294 else:
1295 if pathname[-3:] != ".py":
Collin Winterce36ad82007-08-30 01:19:48 +00001296 raise RuntimeError(
1297 'Files added with writepy() must end with ".py"')
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001298 fname, arcname = self._get_codename(pathname[0:-3], basename)
1299 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001300 print("Adding file", arcname)
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001301 self.write(fname, arcname)
1302
1303 def _get_codename(self, pathname, basename):
1304 """Return (filename, archivename) for the path.
1305
Fred Drake484d7352000-10-02 21:14:52 +00001306 Given a module name path, return the correct file path and
1307 archive name, compiling if necessary. For example, given
1308 /python/lib/string, return (/python/lib/string.pyc, string).
1309 """
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001310 file_py = pathname + ".py"
1311 file_pyc = pathname + ".pyc"
1312 file_pyo = pathname + ".pyo"
Barry Warsaw28a691b2010-04-17 00:19:56 +00001313 pycache_pyc = imp.cache_from_source(file_py, True)
1314 pycache_pyo = imp.cache_from_source(file_py, False)
1315 if (os.path.isfile(file_pyo) and
1316 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1317 # Use .pyo file.
1318 arcname = fname = file_pyo
1319 elif (os.path.isfile(file_pyc) and
1320 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1321 # Use .pyc file.
1322 arcname = fname = file_pyc
1323 elif (os.path.isfile(pycache_pyc) and
1324 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1325 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1326 # file name in the archive.
1327 fname = pycache_pyc
1328 arcname = file_pyc
1329 elif (os.path.isfile(pycache_pyo) and
1330 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1331 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1332 # file name in the archive.
1333 fname = pycache_pyo
1334 arcname = file_pyo
1335 else:
1336 # Compile py into PEP 3147 pyc file.
Fred Drake484d7352000-10-02 21:14:52 +00001337 import py_compile
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001338 if self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001339 print("Compiling", file_py)
Martin v. Löwis0c6774d2003-01-15 11:51:06 +00001340 try:
Barry Warsaw28a691b2010-04-17 00:19:56 +00001341 py_compile.compile(file_py, doraise=True)
1342 except py_compile.PyCompileError as error:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001343 print(err.msg)
Barry Warsaw28a691b2010-04-17 00:19:56 +00001344 fname = file_py
1345 else:
1346 fname = (pycache_pyc if __debug__ else pycache_pyo)
1347 arcname = (file_pyc if __debug__ else file_pyo)
1348 archivename = os.path.split(arcname)[1]
Guido van Rossum32abe6f2000-03-31 17:30:02 +00001349 if basename:
1350 archivename = "%s/%s" % (basename, archivename)
1351 return (fname, archivename)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001352
1353
1354def main(args = None):
1355 import textwrap
1356 USAGE=textwrap.dedent("""\
1357 Usage:
1358 zipfile.py -l zipfile.zip # Show listing of a zipfile
1359 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1360 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1361 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1362 """)
1363 if args is None:
1364 args = sys.argv[1:]
1365
1366 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001367 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001368 sys.exit(1)
1369
1370 if args[0] == '-l':
1371 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001372 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001373 sys.exit(1)
1374 zf = ZipFile(args[1], 'r')
1375 zf.printdir()
1376 zf.close()
1377
1378 elif args[0] == '-t':
1379 if len(args) != 2:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001380 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001381 sys.exit(1)
1382 zf = ZipFile(args[1], 'r')
1383 zf.testzip()
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001384 print("Done testing")
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001385
1386 elif args[0] == '-e':
1387 if len(args) != 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001388 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001389 sys.exit(1)
1390
1391 zf = ZipFile(args[1], 'r')
1392 out = args[2]
1393 for path in zf.namelist():
1394 if path.startswith('./'):
1395 tgt = os.path.join(out, path[2:])
1396 else:
1397 tgt = os.path.join(out, path)
1398
1399 tgtdir = os.path.dirname(tgt)
1400 if not os.path.exists(tgtdir):
1401 os.makedirs(tgtdir)
Benjamin Petersonfa0d7032009-06-01 22:42:33 +00001402 with open(tgt, 'wb') as fp:
1403 fp.write(zf.read(path))
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001404 zf.close()
1405
1406 elif args[0] == '-c':
1407 if len(args) < 3:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001408 print(USAGE)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001409 sys.exit(1)
1410
1411 def addToZip(zf, path, zippath):
1412 if os.path.isfile(path):
1413 zf.write(path, zippath, ZIP_DEFLATED)
1414 elif os.path.isdir(path):
1415 for nm in os.listdir(path):
1416 addToZip(zf,
1417 os.path.join(path, nm), os.path.join(zippath, nm))
1418 # else: ignore
1419
1420 zf = ZipFile(args[1], 'w', allowZip64=True)
1421 for src in args[2:]:
1422 addToZip(zf, src, os.path.basename(src))
1423
1424 zf.close()
1425
1426if __name__ == "__main__":
1427 main()